ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@redhat.com>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.114 2003/10/09 13:53:35 dwmw2 Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22
23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, 
24                                           struct jffs2_inode_cache *ic,
25                                           struct jffs2_raw_node_ref *raw);
26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
27                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
29                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
34                                       uint32_t start, uint32_t end);
35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
36                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
37                                        uint32_t start, uint32_t end);
38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
39                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_cache *ic);
40
41 /* Called with erase_completion_lock held */
42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
43 {
44         struct jffs2_eraseblock *ret;
45         struct list_head *nextlist = NULL;
46         int n = jiffies % 128;
47
48         /* Pick an eraseblock to garbage collect next. This is where we'll
49            put the clever wear-levelling algorithms. Eventually.  */
50         /* We possibly want to favour the dirtier blocks more when the
51            number of free blocks is low. */
52         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
53                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54                 nextlist = &c->bad_used_list;
55         } else if (n < 50 && !list_empty(&c->erasable_list)) {
56                 /* Note that most of them will have gone directly to be erased. 
57                    So don't favour the erasable_list _too_ much. */
58                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59                 nextlist = &c->erasable_list;
60         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61                 /* Most of the time, pick one off the very_dirty list */
62                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63                 nextlist = &c->very_dirty_list;
64         } else if (n < 126 && !list_empty(&c->dirty_list)) {
65                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66                 nextlist = &c->dirty_list;
67         } else if (!list_empty(&c->clean_list)) {
68                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69                 nextlist = &c->clean_list;
70         } else if (!list_empty(&c->dirty_list)) {
71                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
72
73                 nextlist = &c->dirty_list;
74         } else if (!list_empty(&c->very_dirty_list)) {
75                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76                 nextlist = &c->very_dirty_list;
77         } else if (!list_empty(&c->erasable_list)) {
78                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
79
80                 nextlist = &c->erasable_list;
81         } else {
82                 /* Eep. All were empty */
83                 printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
84                 return NULL;
85         }
86
87         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
88         list_del(&ret->list);
89         c->gcblock = ret;
90         ret->gc_node = ret->first_node;
91         if (!ret->gc_node) {
92                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
93                 BUG();
94         }
95         
96         /* Have we accidentally picked a clean block with wasted space ? */
97         if (ret->wasted_size) {
98                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
99                 ret->dirty_size += ret->wasted_size;
100                 c->wasted_size -= ret->wasted_size;
101                 c->dirty_size += ret->wasted_size;
102                 ret->wasted_size = 0;
103         }
104
105         D1(jffs2_dump_block_lists(c));
106         return ret;
107 }
108
109 /* jffs2_garbage_collect_pass
110  * Make a single attempt to progress GC. Move one node, and possibly
111  * start erasing one eraseblock.
112  */
113 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
114 {
115         struct jffs2_inode_cache *ic;
116         struct jffs2_eraseblock *jeb;
117         struct jffs2_raw_node_ref *raw;
118         uint32_t inum;
119         int ret = 0;
120
121         if (down_interruptible(&c->alloc_sem))
122                 return -EINTR;
123
124         for (;;) {
125                 spin_lock(&c->erase_completion_lock);
126                 if (!c->unchecked_size)
127                         break;
128
129                 /* We can't start doing GC yet. We haven't finished checking
130                    the node CRCs etc. Do it now. */
131                 
132                 /* checked_ino is protected by the alloc_sem */
133                 if (c->checked_ino > c->highest_ino) {
134                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
135                                c->unchecked_size);
136                         D1(jffs2_dump_block_lists(c));
137                         spin_unlock(&c->erase_completion_lock);
138                         BUG();
139                 }
140
141                 spin_unlock(&c->erase_completion_lock);
142
143                 spin_lock(&c->inocache_lock);
144
145                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
146
147                 if (!ic) {
148                         spin_unlock(&c->inocache_lock);
149                         continue;
150                 }
151
152                 if (!ic->nlink) {
153                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
154                                   ic->ino));
155                         spin_unlock(&c->inocache_lock);
156                         continue;
157                 }
158                 switch(ic->state) {
159                 case INO_STATE_CHECKEDABSENT:
160                 case INO_STATE_PRESENT:
161                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
162                         spin_unlock(&c->inocache_lock);
163                         continue;
164
165                 case INO_STATE_GC:
166                 case INO_STATE_CHECKING:
167                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
168                         spin_unlock(&c->inocache_lock);
169                         BUG();
170
171                 case INO_STATE_READING:
172                         /* We need to wait for it to finish, lest we move on
173                            and trigger the BUG() above while we haven't yet 
174                            finished checking all its nodes */
175                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
176                         up(&c->alloc_sem);
177                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
178                         return 0;
179
180                 default:
181                         BUG();
182
183                 case INO_STATE_UNCHECKED:
184                         ;
185                 }
186                 ic->state = INO_STATE_CHECKING;
187                 spin_unlock(&c->inocache_lock);
188
189                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%d\n", ic->ino));
190
191                 ret = jffs2_do_crccheck_inode(c, ic);
192                 if (ret)
193                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
194
195                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
196                 up(&c->alloc_sem);
197                 return ret;
198         }
199
200         /* First, work out which block we're garbage-collecting */
201         jeb = c->gcblock;
202
203         if (!jeb)
204                 jeb = jffs2_find_gc_block(c);
205
206         if (!jeb) {
207                 printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n");
208                 spin_unlock(&c->erase_completion_lock);
209                 up(&c->alloc_sem);
210                 return -EIO;
211         }
212
213         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
214         D1(if (c->nextblock)
215            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
216
217         if (!jeb->used_size) {
218                 up(&c->alloc_sem);
219                 goto eraseit;
220         }
221
222         raw = jeb->gc_node;
223                         
224         while(ref_obsolete(raw)) {
225                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
226                 jeb->gc_node = raw = raw->next_phys;
227                 if (!raw) {
228                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
229                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", 
230                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
231                         spin_unlock(&c->erase_completion_lock);
232                         up(&c->alloc_sem);
233                         BUG();
234                 }
235         }
236         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
237         if (!raw->next_in_ino) {
238                 /* Inode-less node. Clean marker, snapshot or something like that */
239                 /* FIXME: If it's something that needs to be copied, including something
240                    we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
241                 spin_unlock(&c->erase_completion_lock);
242                 jffs2_mark_node_obsolete(c, raw);
243                 up(&c->alloc_sem);
244                 goto eraseit_lock;
245         }
246                                                      
247         inum = jffs2_raw_ref_to_inum(raw);
248         D1(printk(KERN_DEBUG "Inode number is #%u\n", inum));
249
250         spin_unlock(&c->erase_completion_lock);
251
252         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), inum));
253
254         /* Three possibilities:
255            1. Inode is already in-core. We must iget it and do proper
256               updating to its fragtree, etc.
257            2. Inode is not in-core, node is REF_PRISTINE. We lock the
258               inocache to prevent a read_inode(), copy the node intact.
259            3. Inode is not in-core, node is not pristine. We must iget()
260               and take the slow path.
261         */
262         spin_lock(&c->inocache_lock);
263         ic = jffs2_get_ino_cache(c, inum);
264
265         /* This should never fail unless I'm particularly stupid.
266            So we don't check before dereferencing it */
267
268         switch(ic->state) {
269         case INO_STATE_CHECKEDABSENT:
270                 /* It's been checked, but it's not currently in-core. 
271                    We can just copy any pristine nodes, but have
272                    to prevent anyone else from doing read_inode() while
273                    we're at it, so we set the state accordingly */
274                 if (ref_flags(raw) == REF_PRISTINE)
275                         ic->state = INO_STATE_GC;
276                 else {
277                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n", 
278                                   inum));
279                 }
280                 break;
281
282         case INO_STATE_PRESENT:
283         case INO_STATE_UNCHECKED:
284                 /* It's in-core or hasn't been checked. GC must iget() it. */
285                 break;
286
287         case INO_STATE_CHECKING:
288                 /* Should never happen. We should have finished checking
289                    by the time we actually start doing any GC. */
290                 BUG();
291
292         
293         case INO_STATE_GC:
294                 /* Should never happen. We are holding the alloc_sem, 
295                    no other garbage collection can happen. Note that we
296                    do depend on this later when deciding to do a simple
297                    node copy */
298                 BUG();
299                          
300         case INO_STATE_READING:
301                 /* Someone's currently trying to read it. We must wait for
302                    them to finish and then go through the full iget() route
303                    to do the GC. However, sometimes read_inode() needs to get
304                    the alloc_sem() (for marking nodes invalid) so we must
305                    drop the alloc_sem before sleeping. */
306
307                 up(&c->alloc_sem);
308                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
309                           inum, ic->state));
310                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
311                 /* And because we dropped the alloc_sem we must start again from the 
312                    beginning. Ponder chance of livelock here -- we're returning success
313                    without actually making any progress.
314
315                    Q: What are the chances that the inode is back in INO_STATE_READING 
316                    again by the time we next enter this function? And that this happens
317                    enough times to cause a real delay?
318
319                    A: Small enough that I don't care :) 
320                 */
321                 return 0;
322
323         }
324
325         spin_unlock(&c->inocache_lock);
326
327         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
328            node intact, and we don't have to muck about with the fragtree etc. 
329            because we know it's not in-core. If it _was_ in-core, we go through
330            all the iget() crap anyway */
331
332         if (ic->state == INO_STATE_GC) {
333                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
334                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
335
336                 if (ret != -EBADFD)
337                         goto release_sem;
338
339                 /* Fall through if it wanted us to */
340         }
341
342         ret = jffs2_garbage_collect_live(c, jeb, raw, ic);
343
344  release_sem:
345         up(&c->alloc_sem);
346
347  eraseit_lock:
348         /* If we've finished this block, start it erasing */
349         spin_lock(&c->erase_completion_lock);
350
351  eraseit:
352         if (c->gcblock && !c->gcblock->used_size) {
353                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
354                 /* We're GC'ing an empty block? */
355                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
356                 c->gcblock = NULL;
357                 c->nr_erasing_blocks++;
358                 jffs2_erase_pending_trigger(c);
359         }
360         spin_unlock(&c->erase_completion_lock);
361
362         return ret;
363 }
364
365
366 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
367                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_cache *ic)
368 {
369         struct jffs2_inode_info *f;
370         struct jffs2_node_frag *frag;
371         struct jffs2_full_dnode *fn = NULL;
372         struct jffs2_full_dirent *fd;
373         uint32_t start = 0, end = 0, nrfrags = 0;
374         struct inode *inode;
375         int ret = 0;
376
377         inode = iget(OFNI_BS_2SFFJ(c), ic->ino);
378         if (is_bad_inode(inode)) {
379                 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u\n", ic->ino);
380                 /* NB. This will happen again. We need to do something appropriate here. */
381                 up(&c->alloc_sem);
382                 iput(inode);
383                 return -EIO;
384         }
385
386         f = JFFS2_INODE_INFO(inode);
387         down(&f->sem);
388
389         /* Now we have the lock for this inode. Check that it's still the one at the head
390            of the list. */
391
392         if (ref_obsolete(raw)) {
393                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
394                 /* They'll call again */
395                 goto upnout;
396         }
397         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
398         if (f->metadata && f->metadata->raw == raw) {
399                 fn = f->metadata;
400                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
401                 goto upnout;
402         }
403
404         /* FIXME. Read node and do lookup? */
405         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
406                 if (frag->node && frag->node->raw == raw) {
407                         fn = frag->node;
408                         end = frag->ofs + frag->size;
409 #if 1 /* Temporary debugging sanity checks, till we're ready to _trust_ the REF_PRISTINE flag stuff */ 
410                         if (!nrfrags && ref_flags(fn->raw) == REF_PRISTINE) {
411                                 if (fn->frags > 1) {
412                                         printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had %d frags. Tell dwmw2\n", ref_offset(raw), fn->frags);
413                                         mark_ref_normal(raw);
414                                 }
415                                 /* A hole node which isn't multi-page should be garbage-collected
416                                    and merged anyway, so we just check for the frag size here,
417                                    rather than mucking around with actually reading the node
418                                    and checking the compression type, which is the real way
419                                    to tell a hole node. */
420                                 if (frag->ofs & (PAGE_CACHE_SIZE-1) && frag_prev(frag) && frag_prev(frag)->size < PAGE_CACHE_SIZE) {
421                                         printk(KERN_WARNING "REF_PRISTINE node at 0x%08x had a previous non-hole frag in the same page. Tell dwmw2\n",
422                                                ref_offset(raw));
423                                         mark_ref_normal(raw);
424                                 }
425
426                                 if ((frag->ofs+frag->size) & (PAGE_CACHE_SIZE-1) && frag_next(frag) && frag_next(frag)->size < PAGE_CACHE_SIZE) {
427                                         printk(KERN_WARNING "REF_PRISTINE node at 0x%08x (%08x-%08x) had a following non-hole frag in the same page. Tell dwmw2\n",
428                                                ref_offset(raw), frag->ofs, frag->ofs+frag->size);
429                                         mark_ref_normal(raw);
430                                 }
431                         }
432 #endif
433                         if (!nrfrags++)
434                                 start = frag->ofs;
435                         if (nrfrags == frag->node->frags)
436                                 break; /* We've found them all */
437                 }
438         }
439         if (fn) {
440                 if (ref_flags(raw) == REF_PRISTINE) {
441                         ret = jffs2_garbage_collect_pristine(c, ic, raw);
442                         if (!ret) {
443                                 /* Urgh. Return it sensibly. */
444                                 frag->node->raw = ic->nodes;
445                         }       
446                         if (ret != -EBADFD)
447                                 goto upnout;
448                 }
449                 /* We found a datanode. Do the GC */
450                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
451                         /* It crosses a page boundary. Therefore, it must be a hole. */
452                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
453                 } else {
454                         /* It could still be a hole. But we GC the page this way anyway */
455                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
456                 }
457                 goto upnout;
458         }
459         
460         /* Wasn't a dnode. Try dirent */
461         for (fd = f->dents; fd; fd=fd->next) {
462                 if (fd->raw == raw)
463                         break;
464         }
465
466         if (fd && fd->ino) {
467                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
468         } else if (fd) {
469                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
470         } else {
471                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
472                        ref_offset(raw), f->inocache->ino);
473                 if (ref_obsolete(raw)) {
474                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
475                 } else {
476                         ret = -EIO;
477                 }
478         }
479  upnout:
480         up(&f->sem);
481         iput(inode);
482
483         return ret;
484 }
485
486 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, 
487                                           struct jffs2_inode_cache *ic,
488                                           struct jffs2_raw_node_ref *raw)
489 {
490         union jffs2_node_union *node;
491         struct jffs2_raw_node_ref *nraw;
492         size_t retlen;
493         int ret;
494         uint32_t phys_ofs, alloclen;
495         uint32_t crc;
496         int retried = 0;
497
498         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
499
500         /* Ask for a small amount of space (or the totlen if smaller) because we
501            don't want to force wastage of the end of a block if splitting would
502            work. */
503         ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN, raw->totlen),
504                                      &phys_ofs, &alloclen);
505         if (ret)
506                 return ret;
507
508         if (alloclen < raw->totlen) {
509                 /* Doesn't fit untouched. We'll go the old route and split it */
510                 return -EBADFD;
511         }
512
513         node = kmalloc(raw->totlen, GFP_KERNEL);
514         if (!node)
515                return -ENOMEM;
516
517         ret = jffs2_flash_read(c, ref_offset(raw), raw->totlen, &retlen, (char *)node);
518         if (!ret && retlen != raw->totlen)
519                 ret = -EIO;
520         if (ret)
521                 goto out_node;
522
523         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
524         if (je32_to_cpu(node->u.hdr_crc) != crc) {
525                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
526                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
527                 goto bail;
528         }
529
530         switch(je16_to_cpu(node->u.nodetype)) {
531         case JFFS2_NODETYPE_INODE:
532                 crc = crc32(0, node, sizeof(node->i)-8);
533                 if (je32_to_cpu(node->i.node_crc) != crc) {
534                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
535                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
536                         goto bail;
537                 }
538
539                 if (je32_to_cpu(node->i.dsize)) {
540                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
541                         if (je32_to_cpu(node->i.data_crc) != crc) {
542                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
543                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
544                                 goto bail;
545                         }
546                 }
547                 break;
548
549         case JFFS2_NODETYPE_DIRENT:
550                 crc = crc32(0, node, sizeof(node->d)-8);
551                 if (je32_to_cpu(node->d.node_crc) != crc) {
552                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
553                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
554                         goto bail;
555                 }
556
557                 if (node->d.nsize) {
558                         crc = crc32(0, node->d.name, node->d.nsize);
559                         if (je32_to_cpu(node->d.name_crc) != crc) {
560                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
561                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
562                                 goto bail;
563                         }
564                 }
565                 break;
566         default:
567                 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", 
568                        ref_offset(raw), je16_to_cpu(node->u.nodetype));
569                 goto bail;
570         }
571
572         nraw = jffs2_alloc_raw_node_ref();
573         if (!nraw) {
574                 ret = -ENOMEM;
575                 goto out_node;
576         }
577
578         /* OK, all the CRCs are good; this node can just be copied as-is. */
579  retry:
580         nraw->flash_offset = phys_ofs;
581         nraw->totlen = raw->totlen;
582         nraw->next_phys = NULL;
583
584         ret = jffs2_flash_write(c, phys_ofs, raw->totlen, &retlen, (char *)node);
585
586         if (ret || (retlen != raw->totlen)) {
587                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
588                        raw->totlen, phys_ofs, ret, retlen);
589                 if (retlen) {
590                         /* Doesn't belong to any inode */
591                         nraw->next_in_ino = NULL;
592
593                         nraw->flash_offset |= REF_OBSOLETE;
594                         jffs2_add_physical_node_ref(c, nraw);
595                         jffs2_mark_node_obsolete(c, nraw);
596                 } else {
597                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
598                         jffs2_free_raw_node_ref(nraw);
599                 }
600                 if (!retried && (nraw == jffs2_alloc_raw_node_ref())) {
601                         /* Try to reallocate space and retry */
602                         uint32_t dummy;
603                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
604
605                         retried = 1;
606
607                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
608                         
609                         ACCT_SANITY_CHECK(c,jeb);
610                         D1(ACCT_PARANOIA_CHECK(jeb));
611
612                         ret = jffs2_reserve_space_gc(c, raw->totlen, &phys_ofs, &dummy);
613
614                         if (!ret) {
615                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
616
617                                 ACCT_SANITY_CHECK(c,jeb);
618                                 D1(ACCT_PARANOIA_CHECK(jeb));
619
620                                 goto retry;
621                         }
622                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
623                         jffs2_free_raw_node_ref(nraw);
624                 }
625
626                 if (!ret)
627                         ret = -EIO;
628                 goto out_node;
629         }
630         nraw->flash_offset |= REF_PRISTINE;
631         jffs2_add_physical_node_ref(c, nraw);
632
633         /* Link into per-inode list. This is safe because of the ic
634            state being INO_STATE_GC. Note that if we're doing this
635            for an inode which is in-code, the 'nraw' pointer is then
636            going to be fetched from ic->nodes by our caller. */
637         nraw->next_in_ino = ic->nodes;
638         ic->nodes = nraw;
639
640         jffs2_mark_node_obsolete(c, raw);
641         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
642
643  out_node:
644         kfree(node);
645         return ret;
646  bail:
647         ret = -EBADFD;
648         goto out_node;
649 }
650
651 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
652                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
653 {
654         struct jffs2_full_dnode *new_fn;
655         struct jffs2_raw_inode ri;
656         jint16_t dev;
657         char *mdata = NULL, mdatalen = 0;
658         uint32_t alloclen, phys_ofs;
659         int ret;
660
661         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
662             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
663                 /* For these, we don't actually need to read the old node */
664                 /* FIXME: for minor or major > 255. */
665                 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) | 
666                         JFFS2_F_I_RDEV_MIN(f)));
667                 mdata = (char *)&dev;
668                 mdatalen = sizeof(dev);
669                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
670         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
671                 mdatalen = fn->size;
672                 mdata = kmalloc(fn->size, GFP_KERNEL);
673                 if (!mdata) {
674                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
675                         return -ENOMEM;
676                 }
677                 ret = jffs2_read_dnode(c, fn, mdata, 0, mdatalen);
678                 if (ret) {
679                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
680                         kfree(mdata);
681                         return ret;
682                 }
683                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
684
685         }
686         
687         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen);
688         if (ret) {
689                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
690                        sizeof(ri)+ mdatalen, ret);
691                 goto out;
692         }
693         
694         memset(&ri, 0, sizeof(ri));
695         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
696         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
697         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
698         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
699
700         ri.ino = cpu_to_je32(f->inocache->ino);
701         ri.version = cpu_to_je32(++f->highest_version);
702         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
703         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
704         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
705         ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
706         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
707         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
708         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
709         ri.offset = cpu_to_je32(0);
710         ri.csize = cpu_to_je32(mdatalen);
711         ri.dsize = cpu_to_je32(mdatalen);
712         ri.compr = JFFS2_COMPR_NONE;
713         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
714         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
715
716         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
717
718         if (IS_ERR(new_fn)) {
719                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
720                 ret = PTR_ERR(new_fn);
721                 goto out;
722         }
723         jffs2_mark_node_obsolete(c, fn->raw);
724         jffs2_free_full_dnode(fn);
725         f->metadata = new_fn;
726  out:
727         if (S_ISLNK(JFFS2_F_I_MODE(f)))
728                 kfree(mdata);
729         return ret;
730 }
731
732 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
733                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
734 {
735         struct jffs2_full_dirent *new_fd;
736         struct jffs2_raw_dirent rd;
737         uint32_t alloclen, phys_ofs;
738         int ret;
739
740         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
741         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
742         rd.nsize = strlen(fd->name);
743         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
744         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
745
746         rd.pino = cpu_to_je32(f->inocache->ino);
747         rd.version = cpu_to_je32(++f->highest_version);
748         rd.ino = cpu_to_je32(fd->ino);
749         rd.mctime = cpu_to_je32(max(JFFS2_F_I_MTIME(f), JFFS2_F_I_CTIME(f)));
750         rd.type = fd->type;
751         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
752         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
753         
754         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen);
755         if (ret) {
756                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
757                        sizeof(rd)+rd.nsize, ret);
758                 return ret;
759         }
760         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
761
762         if (IS_ERR(new_fd)) {
763                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
764                 return PTR_ERR(new_fd);
765         }
766         jffs2_add_fd_to_list(c, new_fd, &f->dents);
767         return 0;
768 }
769
770 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
771                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
772 {
773         struct jffs2_full_dirent **fdp = &f->dents;
774         int found = 0;
775
776         /* On a medium where we can't actually mark nodes obsolete
777            pernamently, such as NAND flash, we need to work out
778            whether this deletion dirent is still needed to actively
779            delete a 'real' dirent with the same name that's still
780            somewhere else on the flash. */
781         if (!jffs2_can_mark_obsolete(c)) {
782                 struct jffs2_raw_dirent rd;
783                 struct jffs2_raw_node_ref *raw;
784                 int ret;
785                 size_t retlen;
786                 int name_len = strlen(fd->name);
787                 uint32_t name_crc = crc32(0, fd->name, name_len);
788                 char *namebuf = NULL;
789
790                 /* Prevent the erase code from nicking the obsolete node refs while
791                    we're looking at them. I really don't like this extra lock but
792                    can't see any alternative. Suggestions on a postcard to... */
793                 down(&c->erase_free_sem);
794
795                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
796                         /* We only care about obsolete ones */
797                         if (!(ref_obsolete(raw)))
798                                 continue;
799
800                         /* Doesn't matter if there's one in the same erase block. We're going to 
801                            delete it too at the same time. */
802                         if ((raw->flash_offset & ~(c->sector_size-1)) ==
803                             (fd->raw->flash_offset & ~(c->sector_size-1)))
804                                 continue;
805
806                         /* This is an obsolete node belonging to the same directory */
807                         ret = jffs2_flash_read(c, ref_offset(raw), sizeof(struct jffs2_unknown_node), &retlen, (char *)&rd);
808                         if (ret) {
809                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading header from obsolete node at %08x\n", ret, ref_offset(raw));
810                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
811                                 continue;
812                         }
813                         if (retlen != sizeof(struct jffs2_unknown_node)) {
814                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %zd) reading header from obsolete node at %08x\n",
815                                        retlen, sizeof(struct jffs2_unknown_node), ref_offset(raw));
816                                 continue;
817                         }
818                         if (je16_to_cpu(rd.nodetype) != JFFS2_NODETYPE_DIRENT ||
819                             PAD(je32_to_cpu(rd.totlen)) != PAD(sizeof(rd) + name_len))
820                                 continue;
821
822                         /* OK, it's a dirent node, it's the right length. We have to take a 
823                            closer look at it... */
824                         ret = jffs2_flash_read(c, ref_offset(raw), sizeof(rd), &retlen, (char *)&rd);
825                         if (ret) {
826                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading from obsolete node at %08x\n", ret, ref_offset(raw));
827                                 /* If we can't read it, we don't need to continune to obsolete it. Continue */
828                                 continue;
829                         }
830                         if (retlen != sizeof(rd)) {
831                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %zd) reading from obsolete node at %08x\n",
832                                        retlen, sizeof(rd), ref_offset(raw));
833                                 continue;
834                         }
835
836                         /* If the name CRC doesn't match, skip */
837                         if (je32_to_cpu(rd.name_crc) != name_crc)
838                                 continue;
839                         /* If the name length doesn't match, or it's another deletion dirent, skip */
840                         if (rd.nsize != name_len || !je32_to_cpu(rd.ino))
841                                 continue;
842
843                         /* OK, check the actual name now */
844                         if (!namebuf) {
845                                 namebuf = kmalloc(name_len + 1, GFP_KERNEL);
846                                 if (!namebuf) {
847                                         up(&c->erase_free_sem);
848                                         return -ENOMEM;
849                                 }
850                         }
851                         /* We read the extra byte before it so it's a word-aligned read */
852                         ret = jffs2_flash_read(c, (ref_offset(raw))+sizeof(rd)-1, name_len+1, &retlen, namebuf);
853                         if (ret) {
854                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading name from obsolete node at %08x\n", ret, ref_offset(raw));
855                                 /* If we can't read it, we don't need to continune to obsolete it. Continue */
856                                 continue;
857                         }
858                         if (retlen != name_len+1) {
859                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %d) reading name from obsolete node at %08x\n",
860                                        retlen, name_len+1, ref_offset(raw));
861                                 continue;
862                         }
863                         if (memcmp(namebuf+1, fd->name, name_len))
864                                 continue;
865
866                         /* OK. The name really does match. There really is still an older node on
867                            the flash which our deletion dirent obsoletes. So we have to write out
868                            a new deletion dirent to replace it */
869                         
870                         if (namebuf)
871                                 kfree(namebuf);
872
873                         up(&c->erase_free_sem);
874                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
875                 }
876
877                 up(&c->erase_free_sem);
878
879                 if (namebuf) 
880                         kfree(namebuf);
881         }
882
883         /* No need for it any more. Just mark it obsolete and remove it from the list */
884         while (*fdp) {
885                 if ((*fdp) == fd) {
886                         found = 1;
887                         *fdp = fd->next;
888                         break;
889                 }
890                 fdp = &(*fdp)->next;
891         }
892         if (!found) {
893                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
894         }
895         jffs2_mark_node_obsolete(c, fd->raw);
896         jffs2_free_full_dirent(fd);
897         return 0;
898 }
899
900 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
901                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
902                                       uint32_t start, uint32_t end)
903 {
904         struct jffs2_raw_inode ri;
905         struct jffs2_node_frag *frag;
906         struct jffs2_full_dnode *new_fn;
907         uint32_t alloclen, phys_ofs;
908         int ret;
909
910         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
911                   f->inocache->ino, start, end));
912         
913         memset(&ri, 0, sizeof(ri));
914
915         if(fn->frags > 1) {
916                 size_t readlen;
917                 uint32_t crc;
918                 /* It's partially obsoleted by a later write. So we have to 
919                    write it out again with the _same_ version as before */
920                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
921                 if (readlen != sizeof(ri) || ret) {
922                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
923                         goto fill;
924                 }
925                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
926                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
927                                ref_offset(fn->raw),
928                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
929                         return -EIO;
930                 }
931                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
932                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
933                                ref_offset(fn->raw),
934                                je32_to_cpu(ri.totlen), sizeof(ri));
935                         return -EIO;
936                 }
937                 crc = crc32(0, &ri, sizeof(ri)-8);
938                 if (crc != je32_to_cpu(ri.node_crc)) {
939                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
940                                ref_offset(fn->raw), 
941                                je32_to_cpu(ri.node_crc), crc);
942                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
943                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 
944                                start, end, f->inocache->ino);
945                         goto fill;
946                 }
947                 if (ri.compr != JFFS2_COMPR_ZERO) {
948                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
949                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 
950                                start, end, f->inocache->ino);
951                         goto fill;
952                 }
953         } else {
954         fill:
955                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
956                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
957                 ri.totlen = cpu_to_je32(sizeof(ri));
958                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
959
960                 ri.ino = cpu_to_je32(f->inocache->ino);
961                 ri.version = cpu_to_je32(++f->highest_version);
962                 ri.offset = cpu_to_je32(start);
963                 ri.dsize = cpu_to_je32(end - start);
964                 ri.csize = cpu_to_je32(0);
965                 ri.compr = JFFS2_COMPR_ZERO;
966         }
967         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
968         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
969         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
970         ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
971         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
972         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
973         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
974         ri.data_crc = cpu_to_je32(0);
975         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
976
977         ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen);
978         if (ret) {
979                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
980                        sizeof(ri), ret);
981                 return ret;
982         }
983         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
984
985         if (IS_ERR(new_fn)) {
986                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
987                 return PTR_ERR(new_fn);
988         }
989         if (je32_to_cpu(ri.version) == f->highest_version) {
990                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
991                 if (f->metadata) {
992                         jffs2_mark_node_obsolete(c, f->metadata->raw);
993                         jffs2_free_full_dnode(f->metadata);
994                         f->metadata = NULL;
995                 }
996                 return 0;
997         }
998
999         /* 
1000          * We should only get here in the case where the node we are
1001          * replacing had more than one frag, so we kept the same version
1002          * number as before. (Except in case of error -- see 'goto fill;' 
1003          * above.)
1004          */
1005         D1(if(unlikely(fn->frags <= 1)) {
1006                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1007                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1008                        je32_to_cpu(ri.ino));
1009         });
1010
1011         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs); 
1012              frag; frag = frag_next(frag)) {
1013                 if (frag->ofs > fn->size + fn->ofs)
1014                         break;
1015                 if (frag->node == fn) {
1016                         frag->node = new_fn;
1017                         new_fn->frags++;
1018                         fn->frags--;
1019                 }
1020         }
1021         if (fn->frags) {
1022                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1023                 BUG();
1024         }
1025         if (!new_fn->frags) {
1026                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1027                 BUG();
1028         }
1029                 
1030         jffs2_mark_node_obsolete(c, fn->raw);
1031         jffs2_free_full_dnode(fn);
1032         
1033         return 0;
1034 }
1035
1036 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1037                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1038                                        uint32_t start, uint32_t end)
1039 {
1040         struct jffs2_full_dnode *new_fn;
1041         struct jffs2_raw_inode ri;
1042         uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;      
1043         int ret = 0;
1044         unsigned char *comprbuf = NULL, *writebuf;
1045         struct page *pg;
1046         unsigned char *pg_ptr;
1047         /* FIXME: */ struct inode *inode = OFNI_EDONI_2SFFJ(f);
1048
1049         memset(&ri, 0, sizeof(ri));
1050
1051         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1052                   f->inocache->ino, start, end));
1053
1054         orig_end = end;
1055         orig_start = start;
1056
1057         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1058                 /* Attempt to do some merging. But only expand to cover logically
1059                    adjacent frags if the block containing them is already considered
1060                    to be dirty. Otherwise we end up with GC just going round in 
1061                    circles dirtying the nodes it already wrote out, especially 
1062                    on NAND where we have small eraseblocks and hence a much higher
1063                    chance of nodes having to be split to cross boundaries. */
1064
1065                 struct jffs2_node_frag *frag;
1066                 uint32_t min, max;
1067
1068                 min = start & ~(PAGE_CACHE_SIZE-1);
1069                 max = min + PAGE_CACHE_SIZE;
1070
1071                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1072
1073                 /* BUG_ON(!frag) but that'll happen anyway... */
1074
1075                 BUG_ON(frag->ofs != start);
1076
1077                 /* First grow down... */
1078                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1079
1080                         /* If the previous frag doesn't even reach the beginning, there's
1081                            excessive fragmentation. Just merge. */
1082                         if (frag->ofs > min) {
1083                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1084                                           frag->ofs, frag->ofs+frag->size));
1085                                 start = frag->ofs;
1086                                 continue;
1087                         }
1088                         /* OK. This frag holds the first byte of the page. */
1089                         if (!frag->node || !frag->node->raw) {
1090                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1091                                           frag->ofs, frag->ofs+frag->size));
1092                                 break;
1093                         } else {
1094
1095                                 /* OK, it's a frag which extends to the beginning of the page. Does it live 
1096                                    in a block which is still considered clean? If so, don't obsolete it.
1097                                    If not, cover it anyway. */
1098
1099                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1100                                 struct jffs2_eraseblock *jeb;
1101
1102                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1103
1104                                 if (jeb == c->gcblock) {
1105                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1106                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1107                                         start = frag->ofs;
1108                                         break;
1109                                 }
1110                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1111                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1112                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1113                                         break;
1114                                 }
1115
1116                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1117                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1118                                 start = frag->ofs;
1119                                 break;
1120                         }
1121                 }
1122
1123                 /* ... then up */
1124
1125                 /* Find last frag which is actually part of the node we're to GC. */
1126                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1127
1128                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1129
1130                         /* If the previous frag doesn't even reach the beginning, there's lots
1131                            of fragmentation. Just merge. */
1132                         if (frag->ofs+frag->size < max) {
1133                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1134                                           frag->ofs, frag->ofs+frag->size));
1135                                 end = frag->ofs + frag->size;
1136                                 continue;
1137                         }
1138
1139                         if (!frag->node || !frag->node->raw) {
1140                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1141                                           frag->ofs, frag->ofs+frag->size));
1142                                 break;
1143                         } else {
1144
1145                                 /* OK, it's a frag which extends to the beginning of the page. Does it live 
1146                                    in a block which is still considered clean? If so, don't obsolete it.
1147                                    If not, cover it anyway. */
1148
1149                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1150                                 struct jffs2_eraseblock *jeb;
1151
1152                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1153
1154                                 if (jeb == c->gcblock) {
1155                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1156                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1157                                         end = frag->ofs + frag->size;
1158                                         break;
1159                                 }
1160                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1161                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1162                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1163                                         break;
1164                                 }
1165
1166                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1167                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1168                                 end = frag->ofs + frag->size;
1169                                 break;
1170                         }
1171                 }
1172                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n", 
1173                           orig_start, orig_end, start, end));
1174
1175                 BUG_ON(end > JFFS2_F_I_SIZE(f));
1176                 BUG_ON(end < orig_end);
1177                 BUG_ON(start > orig_start);
1178         }
1179         
1180         /* First, use readpage() to read the appropriate page into the page cache */
1181         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1182          *    triggered garbage collection in the first place?
1183          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1184          *    page OK. We'll actually write it out again in commit_write, which is a little
1185          *    suboptimal, but at least we're correct.
1186          */
1187 #ifdef __ECOS
1188         pg = read_cache_page(start >> PAGE_CACHE_SHIFT, (void *)jffs2_do_readpage_unlock, inode);
1189 #else
1190         pg = read_cache_page(inode->i_mapping, start >> PAGE_CACHE_SHIFT, (void *)jffs2_do_readpage_unlock, inode);
1191 #endif
1192         if (IS_ERR(pg)) {
1193                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg));
1194                 return PTR_ERR(pg);
1195         }
1196         pg_ptr = (char *)kmap(pg);
1197         comprbuf = kmalloc(end - start, GFP_KERNEL);
1198
1199         offset = start;
1200         while(offset < orig_end) {
1201                 uint32_t datalen;
1202                 uint32_t cdatalen;
1203                 char comprtype = JFFS2_COMPR_NONE;
1204
1205                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, &alloclen);
1206
1207                 if (ret) {
1208                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1209                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1210                         break;
1211                 }
1212                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1213                 datalen = end - offset;
1214
1215                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1216
1217                 if (comprbuf) {
1218                         comprtype = jffs2_compress(writebuf, comprbuf, &datalen, &cdatalen);
1219                 }
1220                 if (comprtype) {
1221                         writebuf = comprbuf;
1222                 } else {
1223                         datalen = cdatalen;
1224                 }
1225                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1226                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1227                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1228                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1229
1230                 ri.ino = cpu_to_je32(f->inocache->ino);
1231                 ri.version = cpu_to_je32(++f->highest_version);
1232                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1233                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1234                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1235                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1236                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1237                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1238                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1239                 ri.offset = cpu_to_je32(offset);
1240                 ri.csize = cpu_to_je32(cdatalen);
1241                 ri.dsize = cpu_to_je32(datalen);
1242                 ri.compr = comprtype;
1243                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1244                 ri.data_crc = cpu_to_je32(crc32(0, writebuf, cdatalen));
1245         
1246                 new_fn = jffs2_write_dnode(c, f, &ri, writebuf, cdatalen, phys_ofs, ALLOC_GC);
1247
1248                 if (IS_ERR(new_fn)) {
1249                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1250                         ret = PTR_ERR(new_fn);
1251                         break;
1252                 }
1253                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1254                 offset += datalen;
1255                 if (f->metadata) {
1256                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1257                         jffs2_free_full_dnode(f->metadata);
1258                         f->metadata = NULL;
1259                 }
1260         }
1261         if (comprbuf) kfree(comprbuf);
1262
1263         kunmap(pg);
1264         /* XXX: Does the page get freed automatically? */
1265         /* AAA: Judging by the unmount getting stuck in __wait_on_page, nope. */
1266         page_cache_release(pg);
1267         return ret;
1268 }
1269