This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / fs / cachefiles / cf-interface.c
1 /* cf-interface.c: CacheFiles to FS-Cache interface
2  *
3  * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/module.h>
13 #include <linux/sched.h>
14 #include <linux/slab.h>
15 #include <linux/file.h>
16 #include <linux/mount.h>
17 #include <linux/statfs.h>
18 #include <linux/buffer_head.h>
19 #include "internal.h"
20
21 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
22 #define log2(n) ffz(~(n))
23
24 /*****************************************************************************/
25 /*
26  * look up the nominated node in this cache, creating it if necessary
27  */
28 static struct fscache_object *cachefiles_lookup_object(
29         struct fscache_cache *_cache,
30         struct fscache_object *_parent,
31         struct fscache_cookie *cookie)
32 {
33         struct cachefiles_object *parent, *object;
34         struct cachefiles_cache *cache;
35         struct cachefiles_xattr *auxdata;
36         unsigned keylen, auxlen;
37         void *buffer;
38         char *key;
39         int ret;
40
41         ASSERT(_parent);
42
43         cache = container_of(_cache, struct cachefiles_cache, cache);
44         parent = container_of(_parent, struct cachefiles_object, fscache);
45
46         _enter("{%s},%p,%p", cache->cache.identifier, parent, cookie);
47
48         /* create a new object record and a temporary leaf image */
49         object = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL);
50         if (!object)
51                 goto nomem_object;
52
53         atomic_set(&object->usage, 1);
54         atomic_set(&object->fscache_usage, 1);
55
56         fscache_object_init(&object->fscache);
57         object->fscache.cookie = cookie;
58         object->fscache.cache = parent->fscache.cache;
59
60         object->type = cookie->def->type;
61
62         /* get hold of the raw key
63          * - stick the length on the front and leave space on the back for the
64          *   encoder
65          */
66         buffer = kmalloc((2 + 512) + 3, GFP_KERNEL);
67         if (!buffer)
68                 goto nomem_buffer;
69
70         keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512);
71         ASSERTCMP(keylen, <, 512);
72
73         *(uint16_t *)buffer = keylen;
74         ((char *)buffer)[keylen + 2] = 0;
75         ((char *)buffer)[keylen + 3] = 0;
76         ((char *)buffer)[keylen + 4] = 0;
77
78         /* turn the raw key into something that can work with as a filename */
79         key = cachefiles_cook_key(buffer, keylen + 2, object->type);
80         if (!key)
81                 goto nomem_key;
82
83         /* get hold of the auxiliary data and prepend the object type */
84         auxdata = buffer;
85         auxlen = 0;
86         if (cookie->def->get_aux) {
87                 auxlen = cookie->def->get_aux(cookie->netfs_data,
88                                               auxdata->data, 511);
89                 ASSERTCMP(auxlen, <, 511);
90         }
91
92         auxdata->len = auxlen + 1;
93         auxdata->type = cookie->def->type;
94
95         /* look up the key, creating any missing bits */
96         ret = cachefiles_walk_to_object(parent, object, key, auxdata);
97         if (ret < 0)
98                 goto lookup_failed;
99
100         kfree(buffer);
101         kfree(key);
102         _leave(" = %p", &object->fscache);
103         return &object->fscache;
104
105 lookup_failed:
106         kmem_cache_free(cachefiles_object_jar, object);
107         kfree(buffer);
108         kfree(key);
109         _leave(" = %d", ret);
110         return ERR_PTR(ret);
111
112 nomem_key:
113         kfree(buffer);
114 nomem_buffer:
115         kmem_cache_free(cachefiles_object_jar, object);
116 nomem_object:
117         _leave(" = -ENOMEM");
118         return ERR_PTR(-ENOMEM);
119
120 }
121
122 /*****************************************************************************/
123 /*
124  * increment the usage count on an inode object (may fail if unmounting)
125  */
126 static struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
127 {
128         struct cachefiles_object *object;
129
130         _enter("%p", _object);
131
132         object = container_of(_object, struct cachefiles_object, fscache);
133
134 #ifdef CACHEFILES_DEBUG_SLAB
135         ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
136 #endif
137
138         atomic_inc(&object->fscache_usage);
139         return &object->fscache;
140
141 }
142
143 /*****************************************************************************/
144 /*
145  * lock the semaphore on an object object
146  */
147 static void cachefiles_lock_object(struct fscache_object *_object)
148 {
149         struct cachefiles_object *object;
150
151         _enter("%p", _object);
152
153         object = container_of(_object, struct cachefiles_object, fscache);
154
155 #ifdef CACHEFILES_DEBUG_SLAB
156         ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
157 #endif
158
159         down_write(&object->sem);
160
161 }
162
163 /*****************************************************************************/
164 /*
165  * unlock the semaphore on an object object
166  */
167 static void cachefiles_unlock_object(struct fscache_object *_object)
168 {
169         struct cachefiles_object *object;
170
171         _enter("%p", _object);
172
173         object = container_of(_object, struct cachefiles_object, fscache);
174         up_write(&object->sem);
175
176 }
177
178 /*****************************************************************************/
179 /*
180  * update the auxilliary data for an object object on disk
181  */
182 static void cachefiles_update_object(struct fscache_object *_object)
183 {
184         struct cachefiles_object *object;
185         struct cachefiles_cache *cache;
186
187         _enter("%p", _object);
188
189         object = container_of(_object, struct cachefiles_object, fscache);
190         cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
191
192         //cachefiles_tree_update_object(super, object);
193
194 }
195
196 /*****************************************************************************/
197 /*
198  * dispose of a reference to an object object
199  */
200 static void cachefiles_put_object(struct fscache_object *_object)
201 {
202         struct cachefiles_object *object;
203         struct cachefiles_cache *cache;
204
205         ASSERT(_object);
206
207         object = container_of(_object, struct cachefiles_object, fscache);
208         _enter("%p{%d}", object, atomic_read(&object->usage));
209
210         ASSERT(object);
211
212         cache = container_of(object->fscache.cache,
213                              struct cachefiles_cache, cache);
214
215 #ifdef CACHEFILES_DEBUG_SLAB
216         ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
217 #endif
218
219         if (!atomic_dec_and_test(&object->fscache_usage))
220                 return;
221
222         _debug("- kill object %p", object);
223
224         /* delete retired objects */
225         if (test_bit(FSCACHE_OBJECT_RECYCLING, &object->fscache.flags) &&
226             _object != cache->cache.fsdef
227             ) {
228                 _debug("- retire object %p", object);
229                 cachefiles_delete_object(cache, object);
230         }
231
232         /* close the filesystem stuff attached to the object */
233         if (object->backer != object->dentry) {
234                 dput(object->backer);
235                 object->backer = NULL;
236         }
237
238         /* note that an object is now inactive */
239         write_lock(&cache->active_lock);
240         rb_erase(&object->active_node, &cache->active_nodes);
241         write_unlock(&cache->active_lock);
242
243         dput(object->dentry);
244         object->dentry = NULL;
245
246         /* then dispose of the object */
247         kmem_cache_free(cachefiles_object_jar, object);
248
249         _leave("");
250
251 }
252
253 /*****************************************************************************/
254 /*
255  * sync a cache
256  */
257 static void cachefiles_sync_cache(struct fscache_cache *_cache)
258 {
259         struct cachefiles_cache *cache;
260         int ret;
261
262         _enter("%p", _cache);
263
264         cache = container_of(_cache, struct cachefiles_cache, cache);
265
266         /* make sure all pages pinned by operations on behalf of the netfs are
267          * written to disc */
268         ret = fsync_super(cache->mnt->mnt_sb);
269         if (ret == -EIO)
270                 cachefiles_io_error(cache,
271                                     "Attempt to sync backing fs superblock"
272                                     " returned error %d",
273                                     ret);
274
275 }
276
277 /*****************************************************************************/
278 /*
279  * set the data size on an object
280  */
281 static int cachefiles_set_i_size(struct fscache_object *_object, loff_t i_size)
282 {
283         struct cachefiles_object *object;
284         struct iattr newattrs;
285         int ret;
286
287         _enter("%p,%llu", _object, i_size);
288
289         object = container_of(_object, struct cachefiles_object, fscache);
290
291         if (i_size == object->i_size)
292                 return 0;
293
294         if (!object->backer)
295                 return -ENOBUFS;
296
297         ASSERT(S_ISREG(object->backer->d_inode->i_mode));
298
299         newattrs.ia_size = i_size;
300         newattrs.ia_valid = ATTR_SIZE;
301
302         mutex_lock(&object->backer->d_inode->i_mutex);
303         ret = notify_change(object->backer, &newattrs);
304         mutex_unlock(&object->backer->d_inode->i_mutex);
305
306         if (ret == -EIO) {
307                 cachefiles_io_error_obj(object, "Size set failed");
308                 ret = -ENOBUFS;
309         }
310
311         _leave(" = %d", ret);
312         return ret;
313
314 }
315
316 /*****************************************************************************/
317 /*
318  * see if we have space for a number of pages in the cache
319  */
320 int cachefiles_has_space(struct cachefiles_cache *cache, unsigned nr)
321 {
322         struct kstatfs stats;
323         int ret;
324
325         _enter("{%llu,%llu,%llu},%d",
326                cache->brun, cache->bcull, cache->bstop,  nr);
327
328         /* find out how many pages of blockdev are available */
329         memset(&stats, 0, sizeof(stats));
330
331         ret = cache->mnt->mnt_sb->s_op->statfs(cache->mnt->mnt_root, &stats);
332         if (ret < 0) {
333                 if (ret == -EIO)
334                         cachefiles_io_error(cache, "statfs failed");
335                 return ret;
336         }
337
338         stats.f_bavail >>= cache->bshift;
339
340         _debug("avail %llu", stats.f_bavail);
341
342         /* see if there is sufficient space */
343         stats.f_bavail -= nr;
344
345         ret = -ENOBUFS;
346         if (stats.f_bavail < cache->bstop)
347                 goto begin_cull;
348
349         ret = 0;
350         if (stats.f_bavail < cache->bcull)
351                 goto begin_cull;
352
353         if (test_bit(CACHEFILES_CULLING, &cache->flags) &&
354             stats.f_bavail >= cache->brun
355             ) {
356                 if (test_and_clear_bit(CACHEFILES_CULLING, &cache->flags)) {
357                         _debug("cease culling");
358                         send_sigurg(&cache->cachefilesd->f_owner);
359                 }
360         }
361
362         _leave(" = 0");
363         return 0;
364
365 begin_cull:
366         if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) {
367                 _debug("### CULL CACHE ###");
368                 send_sigurg(&cache->cachefilesd->f_owner);
369         }
370
371         _leave(" = %d", ret);
372         return ret;
373
374 }
375
376 /*****************************************************************************/
377 /*
378  * waiting reading backing files
379  */
380 static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
381                                   int sync, void *_key)
382 {
383         struct cachefiles_one_read *monitor =
384                 container_of(wait, struct cachefiles_one_read, monitor);
385         struct wait_bit_key *key = _key;
386         struct page *page = wait->private;
387
388         ASSERT(key);
389
390         _enter("{%lu},%u,%d,{%p,%u}",
391                monitor->netfs_page->index, mode, sync,
392                key->flags, key->bit_nr);
393
394         if (key->flags != &page->flags ||
395             key->bit_nr != PG_locked)
396                 return 0;
397
398         _debug("--- monitor %p %lx ---", page, page->flags);
399
400         if (!PageUptodate(page) && !PageError(page))
401                 dump_stack();
402
403         /* remove from the waitqueue */
404         list_del(&wait->task_list);
405
406         /* move onto the action list and queue for keventd */
407         ASSERT(monitor->object);
408
409         spin_lock(&monitor->object->work_lock);
410         list_move(&monitor->obj_link, &monitor->object->read_list);
411         spin_unlock(&monitor->object->work_lock);
412
413         schedule_work(&monitor->object->read_work);
414
415         return 0;
416
417 }
418
419 /*****************************************************************************/
420 /*
421  * let keventd drive the copying of pages
422  */
423 void cachefiles_read_copier_work(void *_object)
424 {
425         struct cachefiles_one_read *monitor;
426         struct cachefiles_object *object = _object;
427         struct fscache_cookie *cookie = object->fscache.cookie;
428         struct pagevec pagevec;
429         int error, max;
430
431         _enter("{ino=%lu}", object->backer->d_inode->i_ino);
432
433         pagevec_init(&pagevec, 0);
434
435         max = 8;
436         spin_lock_irq(&object->work_lock);
437
438         while (!list_empty(&object->read_list)) {
439                 monitor = list_entry(object->read_list.next,
440                                      struct cachefiles_one_read, obj_link);
441                 list_del(&monitor->obj_link);
442
443                 spin_unlock_irq(&object->work_lock);
444
445                 _debug("- copy {%lu}", monitor->back_page->index);
446
447                 error = -EIO;
448                 if (PageUptodate(monitor->back_page)) {
449                         copy_highpage(monitor->netfs_page, monitor->back_page);
450
451                         pagevec_add(&pagevec, monitor->netfs_page);
452                         cookie->def->mark_pages_cached(
453                                 cookie->netfs_data,
454                                 monitor->netfs_page->mapping,
455                                 &pagevec);
456                         pagevec_reinit(&pagevec);
457
458                         error = 0;
459                 }
460
461                 if (error)
462                         cachefiles_io_error_obj(
463                                 object,
464                                 "readpage failed on backing file %lx",
465                                 (unsigned long) monitor->back_page->flags);
466
467                 page_cache_release(monitor->back_page);
468
469                 monitor->end_io_func(monitor->netfs_page,
470                                      monitor->context,
471                                      error);
472
473                 page_cache_release(monitor->netfs_page);
474                 fscache_put_context(cookie, monitor->context);
475                 kfree(monitor);
476
477                 /* let keventd have some air occasionally */
478                 max--;
479                 if (max < 0 || need_resched()) {
480                         if (!list_empty(&object->read_list))
481                                 schedule_work(&object->read_work);
482                         _leave(" [maxed out]");
483                         return;
484                 }
485
486                 spin_lock_irq(&object->work_lock);
487         }
488
489         spin_unlock_irq(&object->work_lock);
490
491         _leave("");
492
493 }
494
495 /*****************************************************************************/
496 /*
497  * read the corresponding page to the given set from the backing file
498  * - an uncertain page is simply discarded, to be tried again another time
499  */
500 static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
501                                             fscache_rw_complete_t end_io_func,
502                                             void *context,
503                                             struct page *netpage,
504                                             struct pagevec *lru_pvec)
505 {
506         struct cachefiles_one_read *monitor;
507         struct address_space *bmapping;
508         struct page *newpage, *backpage;
509         int ret;
510
511         _enter("");
512
513         ASSERTCMP(pagevec_count(lru_pvec), ==, 0);
514         pagevec_reinit(lru_pvec);
515
516         _debug("read back %p{%lu,%d}",
517                netpage, netpage->index, page_count(netpage));
518
519         monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
520         if (!monitor)
521                 goto nomem;
522
523         monitor->netfs_page = netpage;
524         monitor->object = object;
525         monitor->end_io_func = end_io_func;
526         monitor->context = fscache_get_context(object->fscache.cookie,
527                                                context);
528
529         init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
530
531         /* attempt to get hold of the backing page */
532         bmapping = object->backer->d_inode->i_mapping;
533         newpage = NULL;
534
535         for (;;) {
536                 backpage = find_get_page(bmapping, netpage->index);
537                 if (backpage)
538                         goto backing_page_already_present;
539
540                 if (!newpage) {
541                         newpage = page_cache_alloc_cold(bmapping);
542                         if (!newpage)
543                                 goto nomem_monitor;
544                 }
545
546                 ret = add_to_page_cache(newpage, bmapping,
547                                         netpage->index, GFP_KERNEL);
548                 if (ret == 0)
549                         goto installed_new_backing_page;
550                 if (ret != -EEXIST)
551                         goto nomem_page;
552         }
553
554         /* we've installed a new backing page, so now we need to add it
555          * to the LRU list and start it reading */
556 installed_new_backing_page:
557         _debug("- new %p", newpage);
558
559         backpage = newpage;
560         newpage = NULL;
561
562         page_cache_get(backpage);
563         pagevec_add(lru_pvec, backpage);
564         __pagevec_lru_add(lru_pvec);
565
566         ret = bmapping->a_ops->readpage(NULL, backpage);
567         if (ret < 0)
568                 goto read_error;
569
570         /* set the monitor to transfer the data across */
571 monitor_backing_page:
572         _debug("- monitor add");
573
574         /* install the monitor */
575         page_cache_get(monitor->netfs_page);
576         page_cache_get(backpage);
577         monitor->back_page = backpage;
578
579         spin_lock_irq(&object->work_lock);
580         list_add_tail(&monitor->obj_link, &object->read_pend_list);
581         spin_unlock_irq(&object->work_lock);
582
583         monitor->monitor.private = backpage;
584         install_page_waitqueue_monitor(backpage, &monitor->monitor);
585         monitor = NULL;
586
587         /* but the page may have been read before the monitor was
588          * installed, so the monitor may miss the event - so we have to
589          * ensure that we do get one in such a case */
590         if (!TestSetPageLocked(backpage))
591                 unlock_page(backpage);
592         goto success;
593
594         /* if the backing page is already present, it can be in one of
595          * three states: read in progress, read failed or read okay */
596 backing_page_already_present:
597         _debug("- present");
598
599         if (newpage) {
600                 page_cache_release(newpage);
601                 newpage = NULL;
602         }
603
604         if (PageError(backpage))
605                 goto io_error;
606
607         if (PageUptodate(backpage))
608                 goto backing_page_already_uptodate;
609
610         goto monitor_backing_page;
611
612         /* the backing page is already up to date, attach the netfs
613          * page to the pagecache and LRU and copy the data across */
614 backing_page_already_uptodate:
615         _debug("- uptodate");
616
617         copy_highpage(netpage, backpage);
618         end_io_func(netpage, context, 0);
619
620 success:
621         _debug("success");
622         ret = 0;
623
624 out:
625         if (backpage)
626                 page_cache_release(backpage);
627         if (monitor) {
628                 fscache_put_context(object->fscache.cookie, monitor->context);
629                 kfree(monitor);
630         }
631
632         _leave(" = %d", ret);
633         return ret;
634
635 read_error:
636         _debug("read error %d", ret);
637         if (ret == -ENOMEM)
638                 goto out;
639 io_error:
640         cachefiles_io_error_obj(object, "page read error on backing file");
641         ret = -EIO;
642         goto out;
643
644 nomem_page:
645         page_cache_release(newpage);
646 nomem_monitor:
647         fscache_put_context(object->fscache.cookie, monitor->context);
648         kfree(monitor);
649 nomem:
650         _leave(" = -ENOMEM");
651         return -ENOMEM;
652
653 }
654
655 /*****************************************************************************/
656 /*
657  * read a page from the cache or allocate a block in which to store it
658  * - cache withdrawal is prevented by the caller
659  * - returns -EINTR if interrupted
660  * - returns -ENOMEM if ran out of memory
661  * - returns -ENOBUFS if no buffers can be made available
662  * - returns -ENOBUFS if page is beyond EOF
663  * - if the page is backed by a block in the cache:
664  *   - a read will be started which will call the callback on completion
665  *   - 0 will be returned
666  * - else if the page is unbacked:
667  *   - the metadata will be retained
668  *   - -ENODATA will be returned
669  */
670 static int cachefiles_read_or_alloc_page(struct fscache_object *_object,
671                                          struct page *page,
672                                          fscache_rw_complete_t end_io_func,
673                                          void *context,
674                                          unsigned long gfp)
675 {
676         struct cachefiles_object *object;
677         struct cachefiles_cache *cache;
678         struct fscache_cookie *cookie;
679         struct pagevec pagevec;
680         struct inode *inode;
681         sector_t block0, block;
682         unsigned shift;
683         int ret;
684
685         object = container_of(_object, struct cachefiles_object, fscache);
686         cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
687
688         _enter("{%p},{%lx},,,", object, page->index);
689
690         if (!object->backer)
691                 return -ENOBUFS;
692
693         inode = object->backer->d_inode;
694         ASSERT(S_ISREG(inode->i_mode));
695         ASSERT(inode->i_mapping->a_ops->bmap);
696         ASSERT(inode->i_mapping->a_ops->readpages);
697
698         /* calculate the shift required to use bmap */
699         if (inode->i_sb->s_blocksize > PAGE_SIZE)
700                 return -ENOBUFS;
701
702         shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
703
704         cookie = object->fscache.cookie;
705
706         pagevec_init(&pagevec, 0);
707
708         /* we assume the absence or presence of the first block is a good
709          * enough indication for the page as a whole
710          * - TODO: don't use bmap() for this as it is _not_ actually good
711          *   enough for this as it doesn't indicate errors, but it's all we've
712          *   got for the moment
713          */
714         block0 = page->index;
715         block0 <<= shift;
716
717         block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
718         _debug("%llx -> %llx", block0, block);
719
720         if (block) {
721                 /* submit the apparently valid page to the backing fs to be
722                  * read from disk */
723                 ret = cachefiles_read_backing_file_one(object,
724                                                        end_io_func,
725                                                        context,
726                                                        page,
727                                                        &pagevec);
728                 ret = 0;
729         } else if (cachefiles_has_space(cache, 1) == 0) {
730                 /* there's space in the cache we can use */
731                 pagevec_add(&pagevec, page);
732                 cookie->def->mark_pages_cached(cookie->netfs_data,
733                                                page->mapping, &pagevec);
734                 ret = -ENODATA;
735         } else {
736                 ret = -ENOBUFS;
737         }
738
739         _leave(" = %d", ret);
740         return ret;
741
742 }
743
744 /*****************************************************************************/
745 /*
746  * read the corresponding pages to the given set from the backing file
747  * - any uncertain pages are simply discarded, to be tried again another time
748  */
749 static int cachefiles_read_backing_file(struct cachefiles_object *object,
750                                         fscache_rw_complete_t end_io_func,
751                                         void *context,
752                                         struct address_space *mapping,
753                                         struct list_head *list,
754                                         struct pagevec *lru_pvec)
755 {
756         struct cachefiles_one_read *monitor = NULL;
757         struct address_space *bmapping = object->backer->d_inode->i_mapping;
758         struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
759         int ret = 0;
760
761         _enter("");
762
763         ASSERTCMP(pagevec_count(lru_pvec), ==, 0);
764         pagevec_reinit(lru_pvec);
765
766         list_for_each_entry_safe(netpage, _n, list, lru) {
767                 list_del(&netpage->lru);
768
769                 _debug("read back %p{%lu,%d}",
770                        netpage, netpage->index, page_count(netpage));
771
772                 if (!monitor) {
773                         monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
774                         if (!monitor)
775                                 goto nomem;
776
777                         monitor->object = object;
778                         monitor->end_io_func = end_io_func;
779                         monitor->context = fscache_get_context(
780                                 object->fscache.cookie, context);
781
782                         init_waitqueue_func_entry(&monitor->monitor,
783                                                   cachefiles_read_waiter);
784                 }
785
786                 for (;;) {
787                         backpage = find_get_page(bmapping, netpage->index);
788                         if (backpage)
789                                 goto backing_page_already_present;
790
791                         if (!newpage) {
792                                 newpage = page_cache_alloc_cold(bmapping);
793                                 if (!newpage)
794                                         goto nomem;
795                         }
796
797                         ret = add_to_page_cache(newpage, bmapping,
798                                                 netpage->index, GFP_KERNEL);
799                         if (ret == 0)
800                                 goto installed_new_backing_page;
801                         if (ret != -EEXIST)
802                                 goto nomem;
803                 }
804
805                 /* we've installed a new backing page, so now we need to add it
806                  * to the LRU list and start it reading */
807         installed_new_backing_page:
808                 _debug("- new %p", newpage);
809
810                 backpage = newpage;
811                 newpage = NULL;
812
813                 page_cache_get(backpage);
814                 if (!pagevec_add(lru_pvec, backpage))
815                         __pagevec_lru_add(lru_pvec);
816
817         reread_backing_page:
818                 ret = bmapping->a_ops->readpage(NULL, backpage);
819                 if (ret < 0)
820                         goto read_error;
821
822                 /* add the netfs page to the pagecache and LRU, and set the
823                  * monitor to transfer the data across */
824         monitor_backing_page:
825                 _debug("- monitor add");
826
827                 ret = add_to_page_cache(netpage, mapping, netpage->index,
828                                         GFP_KERNEL);
829                 if (ret < 0) {
830                         if (ret == -EEXIST) {
831                                 page_cache_release(netpage);
832                                 continue;
833                         }
834                         goto nomem;
835                 }
836
837                 page_cache_get(netpage);
838                 if (!pagevec_add(lru_pvec, netpage))
839                         __pagevec_lru_add(lru_pvec);
840
841                 /* install a monitor */
842                 page_cache_get(netpage);
843                 monitor->netfs_page = netpage;
844
845                 page_cache_get(backpage);
846                 monitor->back_page = backpage;
847
848                 spin_lock_irq(&object->work_lock);
849                 list_add_tail(&monitor->obj_link, &object->read_pend_list);
850                 spin_unlock_irq(&object->work_lock);
851
852                 monitor->monitor.private = backpage;
853                 install_page_waitqueue_monitor(backpage, &monitor->monitor);
854                 monitor = NULL;
855
856                 /* but the page may have been read before the monitor was
857                  * installed, so the monitor may miss the event - so we have to
858                  * ensure that we do get one in such a case */
859                 if (!TestSetPageLocked(backpage)) {
860                         _debug("2unlock %p", backpage);
861                         unlock_page(backpage);
862                 }
863
864                 page_cache_release(backpage);
865                 backpage = NULL;
866
867                 page_cache_release(netpage);
868                 netpage = NULL;
869                 continue;
870
871                 /* if the backing page is already present, it can be in one of
872                  * three states: read in progress, read failed or read okay */
873         backing_page_already_present:
874                 _debug("- present %p", backpage);
875
876                 if (PageError(backpage))
877                         goto io_error;
878
879                 if (PageUptodate(backpage))
880                         goto backing_page_already_uptodate;
881
882                 _debug("- not ready %p{%lx}", backpage, backpage->flags);
883
884                 if (TestSetPageLocked(backpage))
885                         goto monitor_backing_page;
886
887                 if (PageError(backpage)) {
888                         unlock_page(backpage);
889                         goto io_error;
890                 }
891
892                 if (PageUptodate(backpage))
893                         goto backing_page_already_uptodate_unlock;
894
895                 /* we've locked a page that's neither up to date nor erroneous,
896                  * so we need to attempt to read it again */
897                 goto reread_backing_page;
898
899                 /* the backing page is already up to date, attach the netfs
900                  * page to the pagecache and LRU and copy the data across */
901         backing_page_already_uptodate_unlock:
902                 unlock_page(backpage);
903         backing_page_already_uptodate:
904                 _debug("- uptodate");
905
906                 ret = add_to_page_cache(netpage, mapping, netpage->index,
907                                         GFP_KERNEL);
908                 if (ret < 0) {
909                         if (ret == -EEXIST) {
910                                 page_cache_release(netpage);
911                                 continue;
912                         }
913                         goto nomem;
914                 }
915
916                 copy_highpage(netpage, backpage);
917
918                 page_cache_release(backpage);
919                 backpage = NULL;
920
921                 page_cache_get(netpage);
922                 if (!pagevec_add(lru_pvec, netpage))
923                         __pagevec_lru_add(lru_pvec);
924
925                 end_io_func(netpage, context, 0);
926
927                 page_cache_release(netpage);
928                 netpage = NULL;
929                 continue;
930         }
931
932         netpage = NULL;
933
934         _debug("out");
935
936 out:
937         /* tidy up */
938         pagevec_lru_add(lru_pvec);
939
940         if (newpage)
941                 page_cache_release(newpage);
942         if (netpage)
943                 page_cache_release(netpage);
944         if (backpage)
945                 page_cache_release(backpage);
946         if (monitor) {
947                 fscache_put_context(object->fscache.cookie, monitor->context);
948                 kfree(monitor);
949         }
950
951         list_for_each_entry_safe(netpage, _n, list, lru) {
952                 list_del(&netpage->lru);
953                 page_cache_release(netpage);
954         }
955
956         _leave(" = %d", ret);
957         return ret;
958
959 nomem:
960         _debug("nomem");
961         ret = -ENOMEM;
962         goto out;
963
964 read_error:
965         _debug("read error %d", ret);
966         if (ret == -ENOMEM)
967                 goto out;
968 io_error:
969         cachefiles_io_error_obj(object, "page read error on backing file");
970         ret = -EIO;
971         goto out;
972
973 }
974
975 /*****************************************************************************/
976 /*
977  * read a list of pages from the cache or allocate blocks in which to store
978  * them
979  */
980 static int cachefiles_read_or_alloc_pages(struct fscache_object *_object,
981                                           struct address_space *mapping,
982                                           struct list_head *pages,
983                                           unsigned *nr_pages,
984                                           fscache_rw_complete_t end_io_func,
985                                           void *context,
986                                           unsigned long gfp)
987 {
988         struct cachefiles_object *object;
989         struct cachefiles_cache *cache;
990         struct fscache_cookie *cookie;
991         struct list_head backpages;
992         struct pagevec pagevec;
993         struct inode *inode;
994         struct page *page, *_n;
995         unsigned shift, nrbackpages;
996         int ret, ret2, space;
997
998         object = container_of(_object, struct cachefiles_object, fscache);
999         cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
1000
1001         _enter("{%p},,%d,,", object, *nr_pages);
1002
1003         if (!object->backer)
1004                 return -ENOBUFS;
1005
1006         space = 1;
1007         if (cachefiles_has_space(cache, *nr_pages) < 0)
1008                 space = 0;
1009
1010         inode = object->backer->d_inode;
1011         ASSERT(S_ISREG(inode->i_mode));
1012         ASSERT(inode->i_mapping->a_ops->bmap);
1013         ASSERT(inode->i_mapping->a_ops->readpages);
1014
1015         /* calculate the shift required to use bmap */
1016         if (inode->i_sb->s_blocksize > PAGE_SIZE)
1017                 return -ENOBUFS;
1018
1019         shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
1020
1021         pagevec_init(&pagevec, 0);
1022
1023         cookie = object->fscache.cookie;
1024
1025         INIT_LIST_HEAD(&backpages);
1026         nrbackpages = 0;
1027
1028         ret = space ? -ENODATA : -ENOBUFS;
1029         list_for_each_entry_safe(page, _n, pages, lru) {
1030                 sector_t block0, block;
1031
1032                 /* we assume the absence or presence of the first block is a
1033                  * good enough indication for the page as a whole
1034                  * - TODO: don't use bmap() for this as it is _not_ actually
1035                  *   good enough for this as it doesn't indicate errors, but
1036                  *   it's all we've got for the moment
1037                  */
1038                 block0 = page->index;
1039                 block0 <<= shift;
1040
1041                 block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
1042                                                       block0);
1043                 _debug("%llx -> %llx", block0, block);
1044
1045                 if (block) {
1046                         /* we have data - add it to the list to give to the
1047                          * backing fs */
1048                         list_move(&page->lru, &backpages);
1049                         (*nr_pages)--;
1050                         nrbackpages++;
1051                 } else if (space && pagevec_add(&pagevec, page) == 0) {
1052                         cookie->def->mark_pages_cached(cookie->netfs_data,
1053                                                        mapping, &pagevec);
1054                         pagevec_reinit(&pagevec);
1055                         ret = -ENODATA;
1056                 }
1057         }
1058
1059         if (pagevec_count(&pagevec) > 0) {
1060                 cookie->def->mark_pages_cached(cookie->netfs_data,
1061                                                mapping, &pagevec);
1062                 pagevec_reinit(&pagevec);
1063         }
1064
1065         if (list_empty(pages))
1066                 ret = 0;
1067
1068         /* submit the apparently valid pages to the backing fs to be read from disk */
1069         if (nrbackpages > 0) {
1070                 ret2 = cachefiles_read_backing_file(object,
1071                                                     end_io_func,
1072                                                     context,
1073                                                     mapping,
1074                                                     &backpages,
1075                                                     &pagevec);
1076
1077                 ASSERTCMP(pagevec_count(&pagevec), ==, 0);
1078
1079                 if (ret2 == -ENOMEM || ret2 == -EINTR)
1080                         ret = ret2;
1081         }
1082
1083         _leave(" = %d [nr=%u%s]",
1084                ret, *nr_pages, list_empty(pages) ? " empty" : "");
1085         return ret;
1086
1087 }
1088
1089 /*****************************************************************************/
1090 /*
1091  * read a page from the cache or allocate a block in which to store it
1092  * - cache withdrawal is prevented by the caller
1093  * - returns -EINTR if interrupted
1094  * - returns -ENOMEM if ran out of memory
1095  * - returns -ENOBUFS if no buffers can be made available
1096  * - returns -ENOBUFS if page is beyond EOF
1097  * - otherwise:
1098  *   - the metadata will be retained
1099  *   - 0 will be returned
1100  */
1101 static int cachefiles_allocate_page(struct fscache_object *_object,
1102                                     struct page *page,
1103                                     unsigned long gfp)
1104 {
1105         struct cachefiles_object *object;
1106         struct cachefiles_cache *cache;
1107
1108         object = container_of(_object, struct cachefiles_object, fscache);
1109         cache = container_of(object->fscache.cache,
1110                              struct cachefiles_cache, cache);
1111
1112         _enter("%p,{%lx},,,", object, page->index);
1113
1114         return cachefiles_has_space(cache, 1);
1115
1116 }
1117
1118 /*****************************************************************************/
1119 /*
1120  * page storer
1121  */
1122 void cachefiles_write_work(void *_object)
1123 {
1124         struct cachefiles_one_write *writer;
1125         struct cachefiles_object *object = _object;
1126         int ret, max;
1127
1128         _enter("%p", object);
1129
1130         ASSERT(!irqs_disabled());
1131
1132         spin_lock_irq(&object->work_lock);
1133         max = 8;
1134
1135         while (!list_empty(&object->write_list)) {
1136                 writer = list_entry(object->write_list.next,
1137                                     struct cachefiles_one_write, obj_link);
1138                 list_del(&writer->obj_link);
1139
1140                 spin_unlock_irq(&object->work_lock);
1141
1142                 _debug("- store {%lu}", writer->netfs_page->index);
1143
1144                 ret = generic_file_buffered_write_one_kernel_page(
1145                         object->backer->d_inode->i_mapping,
1146                         writer->netfs_page->index,
1147                         writer->netfs_page);
1148
1149                 if (ret == -ENOSPC) {
1150                         ret = -ENOBUFS;
1151                 } else if (ret == -EIO) {
1152                         cachefiles_io_error_obj(object,
1153                                                 "write page to backing file"
1154                                                 " failed");
1155                         ret = -ENOBUFS;
1156                 }
1157
1158                 _debug("- callback");
1159                 writer->end_io_func(writer->netfs_page,
1160                                     writer->context,
1161                                     ret);
1162
1163                 _debug("- put net");
1164                 page_cache_release(writer->netfs_page);
1165                 fscache_put_context(object->fscache.cookie, writer->context);
1166                 kfree(writer);
1167
1168                 /* let keventd have some air occasionally */
1169                 max--;
1170                 if (max < 0 || need_resched()) {
1171                         if (!list_empty(&object->write_list))
1172                                 schedule_work(&object->write_work);
1173                         _leave(" [maxed out]");
1174                         return;
1175                 }
1176
1177                 _debug("- next");
1178                 spin_lock_irq(&object->work_lock);
1179         }
1180
1181         spin_unlock_irq(&object->work_lock);
1182         _leave("");
1183
1184 }
1185
1186 /*****************************************************************************/
1187 /*
1188  * request a page be stored in the cache
1189  * - cache withdrawal is prevented by the caller
1190  * - this request may be ignored if there's no cache block available, in which
1191  *   case -ENOBUFS will be returned
1192  * - if the op is in progress, 0 will be returned
1193  */
1194 static int cachefiles_write_page(struct fscache_object *_object,
1195                                  struct page *page,
1196                                  fscache_rw_complete_t end_io_func,
1197                                  void *context,
1198                                  unsigned long gfp)
1199 {
1200 //      struct cachefiles_one_write *writer;
1201         struct cachefiles_object *object;
1202         int ret;
1203
1204         object = container_of(_object, struct cachefiles_object, fscache);
1205
1206         _enter("%p,%p{%lx},,,", object, page, page->index);
1207
1208         if (!object->backer)
1209                 return -ENOBUFS;
1210
1211         ASSERT(S_ISREG(object->backer->d_inode->i_mode));
1212
1213 #if 0 // set to 1 for deferred writing
1214         /* queue the operation for deferred processing by keventd */
1215         writer = kzalloc(sizeof(*writer), GFP_KERNEL);
1216         if (!writer)
1217                 return -ENOMEM;
1218
1219         page_cache_get(page);
1220         writer->netfs_page = page;
1221         writer->object = object;
1222         writer->end_io_func = end_io_func;
1223         writer->context = facache_get_context(object->fscache.cookie, context);
1224
1225         spin_lock_irq(&object->work_lock);
1226         list_add_tail(&writer->obj_link, &object->write_list);
1227         spin_unlock_irq(&object->work_lock);
1228
1229         schedule_work(&object->write_work);
1230         ret = 0;
1231
1232 #else
1233         /* copy the page to ext3 and let it store it in its own time */
1234         ret = generic_file_buffered_write_one_kernel_page(
1235                 object->backer->d_inode->i_mapping, page->index, page);
1236
1237         if (ret != 0) {
1238                 if (ret == -EIO)
1239                         cachefiles_io_error_obj(object,
1240                                                 "write page to backing file"
1241                                                 " failed");
1242                 ret = -ENOBUFS;
1243         }
1244
1245         end_io_func(page, context, ret);
1246 #endif
1247
1248         _leave(" = %d", ret);
1249         return ret;
1250
1251 }
1252
1253 /*****************************************************************************/
1254 /*
1255  * detach a backing block from a page
1256  * - cache withdrawal is prevented by the caller
1257  */
1258 static void cachefiles_uncache_pages(struct fscache_object *_object,
1259                                      struct pagevec *pagevec)
1260 {
1261         struct cachefiles_object *object;
1262         struct cachefiles_cache *cache;
1263
1264         object = container_of(_object, struct cachefiles_object, fscache);
1265         cache = container_of(object->fscache.cache,
1266                              struct cachefiles_cache, cache);
1267
1268         _enter("%p,{%lu,%lx},,,",
1269                object, pagevec->nr, pagevec->pages[0]->index);
1270
1271 }
1272
1273 /*****************************************************************************/
1274 /*
1275  * dissociate a cache from all the pages it was backing
1276  */
1277 static void cachefiles_dissociate_pages(struct fscache_cache *cache)
1278 {
1279         _enter("");
1280
1281 }
1282
1283 struct fscache_cache_ops cachefiles_cache_ops = {
1284         .name                   = "cachefiles",
1285         .lookup_object          = cachefiles_lookup_object,
1286         .grab_object            = cachefiles_grab_object,
1287         .lock_object            = cachefiles_lock_object,
1288         .unlock_object          = cachefiles_unlock_object,
1289         .update_object          = cachefiles_update_object,
1290         .put_object             = cachefiles_put_object,
1291         .sync_cache             = cachefiles_sync_cache,
1292         .set_i_size             = cachefiles_set_i_size,
1293         .read_or_alloc_page     = cachefiles_read_or_alloc_page,
1294         .read_or_alloc_pages    = cachefiles_read_or_alloc_pages,
1295         .allocate_page          = cachefiles_allocate_page,
1296         .write_page             = cachefiles_write_page,
1297         .uncache_pages          = cachefiles_uncache_pages,
1298         .dissociate_pages       = cachefiles_dissociate_pages,
1299 };