7026b0866b7b0d50b0003b15bb14fde24a569a65
[linux-2.6.git] / net / sunrpc / cache.c
1 /*
2  * net/sunrpc/cache.c
3  *
4  * Generic code for various authentication-related caches
5  * used by sunrpc clients and servers.
6  *
7  * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
8  *
9  * Released under terms in GPL version 2.  See COPYING.
10  *
11  */
12
13 #include <linux/types.h>
14 #include <linux/fs.h>
15 #include <linux/file.h>
16 #include <linux/slab.h>
17 #include <linux/signal.h>
18 #include <linux/sched.h>
19 #include <linux/kmod.h>
20 #include <linux/list.h>
21 #include <linux/module.h>
22 #include <linux/ctype.h>
23 #include <asm/uaccess.h>
24 #include <linux/poll.h>
25 #include <linux/seq_file.h>
26 #include <linux/proc_fs.h>
27 #include <linux/net.h>
28 #include <linux/workqueue.h>
29 #include <linux/mutex.h>
30 #include <asm/ioctls.h>
31 #include <linux/sunrpc/types.h>
32 #include <linux/sunrpc/cache.h>
33 #include <linux/sunrpc/stats.h>
34
35 #define  RPCDBG_FACILITY RPCDBG_CACHE
36
37 static void cache_defer_req(struct cache_req *req, struct cache_head *item);
38 static void cache_revisit_request(struct cache_head *item);
39
40 static void cache_init(struct cache_head *h)
41 {
42         time_t now = get_seconds();
43         h->next = NULL;
44         h->flags = 0;
45         kref_init(&h->ref);
46         h->expiry_time = now + CACHE_NEW_EXPIRY;
47         h->last_refresh = now;
48 }
49
50 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
51                                        struct cache_head *key, int hash)
52 {
53         struct cache_head **head,  **hp;
54         struct cache_head *new = NULL;
55
56         head = &detail->hash_table[hash];
57
58         read_lock(&detail->hash_lock);
59
60         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
61                 struct cache_head *tmp = *hp;
62                 if (detail->match(tmp, key)) {
63                         cache_get(tmp);
64                         read_unlock(&detail->hash_lock);
65                         return tmp;
66                 }
67         }
68         read_unlock(&detail->hash_lock);
69         /* Didn't find anything, insert an empty entry */
70
71         new = detail->alloc();
72         if (!new)
73                 return NULL;
74         cache_init(new);
75
76         write_lock(&detail->hash_lock);
77
78         /* check if entry appeared while we slept */
79         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
80                 struct cache_head *tmp = *hp;
81                 if (detail->match(tmp, key)) {
82                         cache_get(tmp);
83                         write_unlock(&detail->hash_lock);
84                         cache_put(new, detail);
85                         return tmp;
86                 }
87         }
88         detail->init(new, key);
89         new->next = *head;
90         *head = new;
91         detail->entries++;
92         cache_get(new);
93         write_unlock(&detail->hash_lock);
94
95         return new;
96 }
97 EXPORT_SYMBOL(sunrpc_cache_lookup);
98
99
100 static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
101
102 static int cache_fresh_locked(struct cache_head *head, time_t expiry)
103 {
104         head->expiry_time = expiry;
105         head->last_refresh = get_seconds();
106         return !test_and_set_bit(CACHE_VALID, &head->flags);
107 }
108
109 static void cache_fresh_unlocked(struct cache_head *head,
110                         struct cache_detail *detail, int new)
111 {
112         if (new)
113                 cache_revisit_request(head);
114         if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
115                 cache_revisit_request(head);
116                 queue_loose(detail, head);
117         }
118 }
119
120 struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
121                                        struct cache_head *new, struct cache_head *old, int hash)
122 {
123         /* The 'old' entry is to be replaced by 'new'.
124          * If 'old' is not VALID, we update it directly,
125          * otherwise we need to replace it
126          */
127         struct cache_head **head;
128         struct cache_head *tmp;
129         int is_new;
130
131         if (!test_bit(CACHE_VALID, &old->flags)) {
132                 write_lock(&detail->hash_lock);
133                 if (!test_bit(CACHE_VALID, &old->flags)) {
134                         if (test_bit(CACHE_NEGATIVE, &new->flags))
135                                 set_bit(CACHE_NEGATIVE, &old->flags);
136                         else
137                                 detail->update(old, new);
138                         is_new = cache_fresh_locked(old, new->expiry_time);
139                         write_unlock(&detail->hash_lock);
140                         cache_fresh_unlocked(old, detail, is_new);
141                         return old;
142                 }
143                 write_unlock(&detail->hash_lock);
144         }
145         /* We need to insert a new entry */
146         tmp = detail->alloc();
147         if (!tmp) {
148                 cache_put(old, detail);
149                 return NULL;
150         }
151         cache_init(tmp);
152         detail->init(tmp, old);
153         head = &detail->hash_table[hash];
154
155         write_lock(&detail->hash_lock);
156         if (test_bit(CACHE_NEGATIVE, &new->flags))
157                 set_bit(CACHE_NEGATIVE, &tmp->flags);
158         else
159                 detail->update(tmp, new);
160         tmp->next = *head;
161         *head = tmp;
162         detail->entries++;
163         cache_get(tmp);
164         is_new = cache_fresh_locked(tmp, new->expiry_time);
165         cache_fresh_locked(old, 0);
166         write_unlock(&detail->hash_lock);
167         cache_fresh_unlocked(tmp, detail, is_new);
168         cache_fresh_unlocked(old, detail, 0);
169         cache_put(old, detail);
170         return tmp;
171 }
172 EXPORT_SYMBOL(sunrpc_cache_update);
173
174 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
175 /*
176  * This is the generic cache management routine for all
177  * the authentication caches.
178  * It checks the currency of a cache item and will (later)
179  * initiate an upcall to fill it if needed.
180  *
181  *
182  * Returns 0 if the cache_head can be used, or cache_puts it and returns
183  * -EAGAIN if upcall is pending,
184  * -ENOENT if cache entry was negative
185  */
186 int cache_check(struct cache_detail *detail,
187                     struct cache_head *h, struct cache_req *rqstp)
188 {
189         int rv;
190         long refresh_age, age;
191
192         /* First decide return status as best we can */
193         if (!test_bit(CACHE_VALID, &h->flags) ||
194             h->expiry_time < get_seconds())
195                 rv = -EAGAIN;
196         else if (detail->flush_time > h->last_refresh)
197                 rv = -EAGAIN;
198         else {
199                 /* entry is valid */
200                 if (test_bit(CACHE_NEGATIVE, &h->flags))
201                         rv = -ENOENT;
202                 else rv = 0;
203         }
204
205         /* now see if we want to start an upcall */
206         refresh_age = (h->expiry_time - h->last_refresh);
207         age = get_seconds() - h->last_refresh;
208
209         if (rqstp == NULL) {
210                 if (rv == -EAGAIN)
211                         rv = -ENOENT;
212         } else if (rv == -EAGAIN || age > refresh_age/2) {
213                 dprintk("Want update, refage=%ld, age=%ld\n", refresh_age, age);
214                 if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
215                         switch (cache_make_upcall(detail, h)) {
216                         case -EINVAL:
217                                 clear_bit(CACHE_PENDING, &h->flags);
218                                 if (rv == -EAGAIN) {
219                                         set_bit(CACHE_NEGATIVE, &h->flags);
220                                         cache_fresh_unlocked(h, detail,
221                                              cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY));
222                                         rv = -ENOENT;
223                                 }
224                                 break;
225
226                         case -EAGAIN:
227                                 clear_bit(CACHE_PENDING, &h->flags);
228                                 cache_revisit_request(h);
229                                 break;
230                         }
231                 }
232         }
233
234         if (rv == -EAGAIN)
235                 cache_defer_req(rqstp, h);
236
237         if (rv)
238                 cache_put(h, detail);
239         return rv;
240 }
241
242 /*
243  * caches need to be periodically cleaned.
244  * For this we maintain a list of cache_detail and
245  * a current pointer into that list and into the table
246  * for that entry.
247  *
248  * Each time clean_cache is called it finds the next non-empty entry
249  * in the current table and walks the list in that entry
250  * looking for entries that can be removed.
251  *
252  * An entry gets removed if:
253  * - The expiry is before current time
254  * - The last_refresh time is before the flush_time for that cache
255  *
256  * later we might drop old entries with non-NEVER expiry if that table
257  * is getting 'full' for some definition of 'full'
258  *
259  * The question of "how often to scan a table" is an interesting one
260  * and is answered in part by the use of the "nextcheck" field in the
261  * cache_detail.
262  * When a scan of a table begins, the nextcheck field is set to a time
263  * that is well into the future.
264  * While scanning, if an expiry time is found that is earlier than the
265  * current nextcheck time, nextcheck is set to that expiry time.
266  * If the flush_time is ever set to a time earlier than the nextcheck
267  * time, the nextcheck time is then set to that flush_time.
268  *
269  * A table is then only scanned if the current time is at least
270  * the nextcheck time.
271  * 
272  */
273
274 static LIST_HEAD(cache_list);
275 static DEFINE_SPINLOCK(cache_list_lock);
276 static struct cache_detail *current_detail;
277 static int current_index;
278
279 static struct file_operations cache_file_operations;
280 static struct file_operations content_file_operations;
281 static struct file_operations cache_flush_operations;
282
283 static void do_cache_clean(void *data);
284 static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
285
286 void cache_register(struct cache_detail *cd)
287 {
288         cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
289         if (cd->proc_ent) {
290                 struct proc_dir_entry *p;
291                 cd->proc_ent->owner = cd->owner;
292                 cd->channel_ent = cd->content_ent = NULL;
293                 
294                 p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR,
295                                       cd->proc_ent);
296                 cd->flush_ent =  p;
297                 if (p) {
298                         p->proc_fops = &cache_flush_operations;
299                         p->owner = cd->owner;
300                         p->data = cd;
301                 }
302  
303                 if (cd->cache_request || cd->cache_parse) {
304                         p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
305                                               cd->proc_ent);
306                         cd->channel_ent = p;
307                         if (p) {
308                                 p->proc_fops = &cache_file_operations;
309                                 p->owner = cd->owner;
310                                 p->data = cd;
311                         }
312                 }
313                 if (cd->cache_show) {
314                         p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
315                                               cd->proc_ent);
316                         cd->content_ent = p;
317                         if (p) {
318                                 p->proc_fops = &content_file_operations;
319                                 p->owner = cd->owner;
320                                 p->data = cd;
321                         }
322                 }
323         }
324         rwlock_init(&cd->hash_lock);
325         INIT_LIST_HEAD(&cd->queue);
326         spin_lock(&cache_list_lock);
327         cd->nextcheck = 0;
328         cd->entries = 0;
329         atomic_set(&cd->readers, 0);
330         cd->last_close = 0;
331         cd->last_warn = -1;
332         list_add(&cd->others, &cache_list);
333         spin_unlock(&cache_list_lock);
334
335         /* start the cleaning process */
336         schedule_work(&cache_cleaner);
337 }
338
339 int cache_unregister(struct cache_detail *cd)
340 {
341         cache_purge(cd);
342         spin_lock(&cache_list_lock);
343         write_lock(&cd->hash_lock);
344         if (cd->entries || atomic_read(&cd->inuse)) {
345                 write_unlock(&cd->hash_lock);
346                 spin_unlock(&cache_list_lock);
347                 return -EBUSY;
348         }
349         if (current_detail == cd)
350                 current_detail = NULL;
351         list_del_init(&cd->others);
352         write_unlock(&cd->hash_lock);
353         spin_unlock(&cache_list_lock);
354         if (cd->proc_ent) {
355                 if (cd->flush_ent)
356                         remove_proc_entry("flush", cd->proc_ent);
357                 if (cd->channel_ent)
358                         remove_proc_entry("channel", cd->proc_ent);
359                 if (cd->content_ent)
360                         remove_proc_entry("content", cd->proc_ent);
361
362                 cd->proc_ent = NULL;
363                 remove_proc_entry(cd->name, proc_net_rpc);
364         }
365         if (list_empty(&cache_list)) {
366                 /* module must be being unloaded so its safe to kill the worker */
367                 cancel_delayed_work(&cache_cleaner);
368                 flush_scheduled_work();
369         }
370         return 0;
371 }
372
373 /* clean cache tries to find something to clean
374  * and cleans it.
375  * It returns 1 if it cleaned something,
376  *            0 if it didn't find anything this time
377  *           -1 if it fell off the end of the list.
378  */
379 static int cache_clean(void)
380 {
381         int rv = 0;
382         struct list_head *next;
383
384         spin_lock(&cache_list_lock);
385
386         /* find a suitable table if we don't already have one */
387         while (current_detail == NULL ||
388             current_index >= current_detail->hash_size) {
389                 if (current_detail)
390                         next = current_detail->others.next;
391                 else
392                         next = cache_list.next;
393                 if (next == &cache_list) {
394                         current_detail = NULL;
395                         spin_unlock(&cache_list_lock);
396                         return -1;
397                 }
398                 current_detail = list_entry(next, struct cache_detail, others);
399                 if (current_detail->nextcheck > get_seconds())
400                         current_index = current_detail->hash_size;
401                 else {
402                         current_index = 0;
403                         current_detail->nextcheck = get_seconds()+30*60;
404                 }
405         }
406
407         /* find a non-empty bucket in the table */
408         while (current_detail &&
409                current_index < current_detail->hash_size &&
410                current_detail->hash_table[current_index] == NULL)
411                 current_index++;
412
413         /* find a cleanable entry in the bucket and clean it, or set to next bucket */
414         
415         if (current_detail && current_index < current_detail->hash_size) {
416                 struct cache_head *ch, **cp;
417                 struct cache_detail *d;
418                 
419                 write_lock(&current_detail->hash_lock);
420
421                 /* Ok, now to clean this strand */
422                         
423                 cp = & current_detail->hash_table[current_index];
424                 ch = *cp;
425                 for (; ch; cp= & ch->next, ch= *cp) {
426                         if (current_detail->nextcheck > ch->expiry_time)
427                                 current_detail->nextcheck = ch->expiry_time+1;
428                         if (ch->expiry_time >= get_seconds()
429                             && ch->last_refresh >= current_detail->flush_time
430                                 )
431                                 continue;
432                         if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
433                                 queue_loose(current_detail, ch);
434
435                         if (atomic_read(&ch->ref.refcount) == 1)
436                                 break;
437                 }
438                 if (ch) {
439                         *cp = ch->next;
440                         ch->next = NULL;
441                         current_detail->entries--;
442                         rv = 1;
443                 }
444                 write_unlock(&current_detail->hash_lock);
445                 d = current_detail;
446                 if (!ch)
447                         current_index ++;
448                 spin_unlock(&cache_list_lock);
449                 if (ch)
450                         cache_put(ch, d);
451         } else
452                 spin_unlock(&cache_list_lock);
453
454         return rv;
455 }
456
457 /*
458  * We want to regularly clean the cache, so we need to schedule some work ...
459  */
460 static void do_cache_clean(void *data)
461 {
462         int delay = 5;
463         if (cache_clean() == -1)
464                 delay = 30*HZ;
465
466         if (list_empty(&cache_list))
467                 delay = 0;
468
469         if (delay)
470                 schedule_delayed_work(&cache_cleaner, delay);
471 }
472
473
474 /* 
475  * Clean all caches promptly.  This just calls cache_clean
476  * repeatedly until we are sure that every cache has had a chance to 
477  * be fully cleaned
478  */
479 void cache_flush(void)
480 {
481         while (cache_clean() != -1)
482                 cond_resched();
483         while (cache_clean() != -1)
484                 cond_resched();
485 }
486
487 void cache_purge(struct cache_detail *detail)
488 {
489         detail->flush_time = LONG_MAX;
490         detail->nextcheck = get_seconds();
491         cache_flush();
492         detail->flush_time = 1;
493 }
494
495
496
497 /*
498  * Deferral and Revisiting of Requests.
499  *
500  * If a cache lookup finds a pending entry, we
501  * need to defer the request and revisit it later.
502  * All deferred requests are stored in a hash table,
503  * indexed by "struct cache_head *".
504  * As it may be wasteful to store a whole request
505  * structure, we allow the request to provide a 
506  * deferred form, which must contain a
507  * 'struct cache_deferred_req'
508  * This cache_deferred_req contains a method to allow
509  * it to be revisited when cache info is available
510  */
511
512 #define DFR_HASHSIZE    (PAGE_SIZE/sizeof(struct list_head))
513 #define DFR_HASH(item)  ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE)
514
515 #define DFR_MAX 300     /* ??? */
516
517 static DEFINE_SPINLOCK(cache_defer_lock);
518 static LIST_HEAD(cache_defer_list);
519 static struct list_head cache_defer_hash[DFR_HASHSIZE];
520 static int cache_defer_cnt;
521
522 static void cache_defer_req(struct cache_req *req, struct cache_head *item)
523 {
524         struct cache_deferred_req *dreq;
525         int hash = DFR_HASH(item);
526
527         dreq = req->defer(req);
528         if (dreq == NULL)
529                 return;
530
531         dreq->item = item;
532         dreq->recv_time = get_seconds();
533
534         spin_lock(&cache_defer_lock);
535
536         list_add(&dreq->recent, &cache_defer_list);
537
538         if (cache_defer_hash[hash].next == NULL)
539                 INIT_LIST_HEAD(&cache_defer_hash[hash]);
540         list_add(&dreq->hash, &cache_defer_hash[hash]);
541
542         /* it is in, now maybe clean up */
543         dreq = NULL;
544         if (++cache_defer_cnt > DFR_MAX) {
545                 /* too much in the cache, randomly drop
546                  * first or last
547                  */
548                 if (net_random()&1) 
549                         dreq = list_entry(cache_defer_list.next,
550                                           struct cache_deferred_req,
551                                           recent);
552                 else
553                         dreq = list_entry(cache_defer_list.prev,
554                                           struct cache_deferred_req,
555                                           recent);
556                 list_del(&dreq->recent);
557                 list_del(&dreq->hash);
558                 cache_defer_cnt--;
559         }
560         spin_unlock(&cache_defer_lock);
561
562         if (dreq) {
563                 /* there was one too many */
564                 dreq->revisit(dreq, 1);
565         }
566         if (!test_bit(CACHE_PENDING, &item->flags)) {
567                 /* must have just been validated... */
568                 cache_revisit_request(item);
569         }
570 }
571
572 static void cache_revisit_request(struct cache_head *item)
573 {
574         struct cache_deferred_req *dreq;
575         struct list_head pending;
576
577         struct list_head *lp;
578         int hash = DFR_HASH(item);
579
580         INIT_LIST_HEAD(&pending);
581         spin_lock(&cache_defer_lock);
582         
583         lp = cache_defer_hash[hash].next;
584         if (lp) {
585                 while (lp != &cache_defer_hash[hash]) {
586                         dreq = list_entry(lp, struct cache_deferred_req, hash);
587                         lp = lp->next;
588                         if (dreq->item == item) {
589                                 list_del(&dreq->hash);
590                                 list_move(&dreq->recent, &pending);
591                                 cache_defer_cnt--;
592                         }
593                 }
594         }
595         spin_unlock(&cache_defer_lock);
596
597         while (!list_empty(&pending)) {
598                 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
599                 list_del_init(&dreq->recent);
600                 dreq->revisit(dreq, 0);
601         }
602 }
603
604 void cache_clean_deferred(void *owner)
605 {
606         struct cache_deferred_req *dreq, *tmp;
607         struct list_head pending;
608
609
610         INIT_LIST_HEAD(&pending);
611         spin_lock(&cache_defer_lock);
612         
613         list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
614                 if (dreq->owner == owner) {
615                         list_del(&dreq->hash);
616                         list_move(&dreq->recent, &pending);
617                         cache_defer_cnt--;
618                 }
619         }
620         spin_unlock(&cache_defer_lock);
621
622         while (!list_empty(&pending)) {
623                 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
624                 list_del_init(&dreq->recent);
625                 dreq->revisit(dreq, 1);
626         }
627 }
628
629 /*
630  * communicate with user-space
631  *
632  * We have a magic /proc file - /proc/sunrpc/cache
633  * On read, you get a full request, or block
634  * On write, an update request is processed
635  * Poll works if anything to read, and always allows write
636  *
637  * Implemented by linked list of requests.  Each open file has 
638  * a ->private that also exists in this list.  New request are added
639  * to the end and may wakeup and preceding readers.
640  * New readers are added to the head.  If, on read, an item is found with
641  * CACHE_UPCALLING clear, we free it from the list.
642  *
643  */
644
645 static DEFINE_SPINLOCK(queue_lock);
646 static DEFINE_MUTEX(queue_io_mutex);
647
648 struct cache_queue {
649         struct list_head        list;
650         int                     reader; /* if 0, then request */
651 };
652 struct cache_request {
653         struct cache_queue      q;
654         struct cache_head       *item;
655         char                    * buf;
656         int                     len;
657         int                     readers;
658 };
659 struct cache_reader {
660         struct cache_queue      q;
661         int                     offset; /* if non-0, we have a refcnt on next request */
662 };
663
664 static ssize_t
665 cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
666 {
667         struct cache_reader *rp = filp->private_data;
668         struct cache_request *rq;
669         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
670         int err;
671
672         if (count == 0)
673                 return 0;
674
675         mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
676                               * readers on this file */
677  again:
678         spin_lock(&queue_lock);
679         /* need to find next request */
680         while (rp->q.list.next != &cd->queue &&
681                list_entry(rp->q.list.next, struct cache_queue, list)
682                ->reader) {
683                 struct list_head *next = rp->q.list.next;
684                 list_move(&rp->q.list, next);
685         }
686         if (rp->q.list.next == &cd->queue) {
687                 spin_unlock(&queue_lock);
688                 mutex_unlock(&queue_io_mutex);
689                 BUG_ON(rp->offset);
690                 return 0;
691         }
692         rq = container_of(rp->q.list.next, struct cache_request, q.list);
693         BUG_ON(rq->q.reader);
694         if (rp->offset == 0)
695                 rq->readers++;
696         spin_unlock(&queue_lock);
697
698         if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
699                 err = -EAGAIN;
700                 spin_lock(&queue_lock);
701                 list_move(&rp->q.list, &rq->q.list);
702                 spin_unlock(&queue_lock);
703         } else {
704                 if (rp->offset + count > rq->len)
705                         count = rq->len - rp->offset;
706                 err = -EFAULT;
707                 if (copy_to_user(buf, rq->buf + rp->offset, count))
708                         goto out;
709                 rp->offset += count;
710                 if (rp->offset >= rq->len) {
711                         rp->offset = 0;
712                         spin_lock(&queue_lock);
713                         list_move(&rp->q.list, &rq->q.list);
714                         spin_unlock(&queue_lock);
715                 }
716                 err = 0;
717         }
718  out:
719         if (rp->offset == 0) {
720                 /* need to release rq */
721                 spin_lock(&queue_lock);
722                 rq->readers--;
723                 if (rq->readers == 0 &&
724                     !test_bit(CACHE_PENDING, &rq->item->flags)) {
725                         list_del(&rq->q.list);
726                         spin_unlock(&queue_lock);
727                         cache_put(rq->item, cd);
728                         kfree(rq->buf);
729                         kfree(rq);
730                 } else
731                         spin_unlock(&queue_lock);
732         }
733         if (err == -EAGAIN)
734                 goto again;
735         mutex_unlock(&queue_io_mutex);
736         return err ? err :  count;
737 }
738
739 static char write_buf[8192]; /* protected by queue_io_mutex */
740
741 static ssize_t
742 cache_write(struct file *filp, const char __user *buf, size_t count,
743             loff_t *ppos)
744 {
745         int err;
746         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
747
748         if (count == 0)
749                 return 0;
750         if (count >= sizeof(write_buf))
751                 return -EINVAL;
752
753         mutex_lock(&queue_io_mutex);
754
755         if (copy_from_user(write_buf, buf, count)) {
756                 mutex_unlock(&queue_io_mutex);
757                 return -EFAULT;
758         }
759         write_buf[count] = '\0';
760         if (cd->cache_parse)
761                 err = cd->cache_parse(cd, write_buf, count);
762         else
763                 err = -EINVAL;
764
765         mutex_unlock(&queue_io_mutex);
766         return err ? err : count;
767 }
768
769 static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
770
771 static unsigned int
772 cache_poll(struct file *filp, poll_table *wait)
773 {
774         unsigned int mask;
775         struct cache_reader *rp = filp->private_data;
776         struct cache_queue *cq;
777         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
778
779         poll_wait(filp, &queue_wait, wait);
780
781         /* alway allow write */
782         mask = POLL_OUT | POLLWRNORM;
783
784         if (!rp)
785                 return mask;
786
787         spin_lock(&queue_lock);
788
789         for (cq= &rp->q; &cq->list != &cd->queue;
790              cq = list_entry(cq->list.next, struct cache_queue, list))
791                 if (!cq->reader) {
792                         mask |= POLLIN | POLLRDNORM;
793                         break;
794                 }
795         spin_unlock(&queue_lock);
796         return mask;
797 }
798
799 static int
800 cache_ioctl(struct inode *ino, struct file *filp,
801             unsigned int cmd, unsigned long arg)
802 {
803         int len = 0;
804         struct cache_reader *rp = filp->private_data;
805         struct cache_queue *cq;
806         struct cache_detail *cd = PDE(ino)->data;
807
808         if (cmd != FIONREAD || !rp)
809                 return -EINVAL;
810
811         spin_lock(&queue_lock);
812
813         /* only find the length remaining in current request,
814          * or the length of the next request
815          */
816         for (cq= &rp->q; &cq->list != &cd->queue;
817              cq = list_entry(cq->list.next, struct cache_queue, list))
818                 if (!cq->reader) {
819                         struct cache_request *cr =
820                                 container_of(cq, struct cache_request, q);
821                         len = cr->len - rp->offset;
822                         break;
823                 }
824         spin_unlock(&queue_lock);
825
826         return put_user(len, (int __user *)arg);
827 }
828
829 static int
830 cache_open(struct inode *inode, struct file *filp)
831 {
832         struct cache_reader *rp = NULL;
833
834         nonseekable_open(inode, filp);
835         if (filp->f_mode & FMODE_READ) {
836                 struct cache_detail *cd = PDE(inode)->data;
837
838                 rp = kmalloc(sizeof(*rp), GFP_KERNEL);
839                 if (!rp)
840                         return -ENOMEM;
841                 rp->offset = 0;
842                 rp->q.reader = 1;
843                 atomic_inc(&cd->readers);
844                 spin_lock(&queue_lock);
845                 list_add(&rp->q.list, &cd->queue);
846                 spin_unlock(&queue_lock);
847         }
848         filp->private_data = rp;
849         return 0;
850 }
851
852 static int
853 cache_release(struct inode *inode, struct file *filp)
854 {
855         struct cache_reader *rp = filp->private_data;
856         struct cache_detail *cd = PDE(inode)->data;
857
858         if (rp) {
859                 spin_lock(&queue_lock);
860                 if (rp->offset) {
861                         struct cache_queue *cq;
862                         for (cq= &rp->q; &cq->list != &cd->queue;
863                              cq = list_entry(cq->list.next, struct cache_queue, list))
864                                 if (!cq->reader) {
865                                         container_of(cq, struct cache_request, q)
866                                                 ->readers--;
867                                         break;
868                                 }
869                         rp->offset = 0;
870                 }
871                 list_del(&rp->q.list);
872                 spin_unlock(&queue_lock);
873
874                 filp->private_data = NULL;
875                 kfree(rp);
876
877                 cd->last_close = get_seconds();
878                 atomic_dec(&cd->readers);
879         }
880         return 0;
881 }
882
883
884
885 static struct file_operations cache_file_operations = {
886         .owner          = THIS_MODULE,
887         .llseek         = no_llseek,
888         .read           = cache_read,
889         .write          = cache_write,
890         .poll           = cache_poll,
891         .ioctl          = cache_ioctl, /* for FIONREAD */
892         .open           = cache_open,
893         .release        = cache_release,
894 };
895
896
897 static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
898 {
899         struct cache_queue *cq;
900         spin_lock(&queue_lock);
901         list_for_each_entry(cq, &detail->queue, list)
902                 if (!cq->reader) {
903                         struct cache_request *cr = container_of(cq, struct cache_request, q);
904                         if (cr->item != ch)
905                                 continue;
906                         if (cr->readers != 0)
907                                 continue;
908                         list_del(&cr->q.list);
909                         spin_unlock(&queue_lock);
910                         cache_put(cr->item, detail);
911                         kfree(cr->buf);
912                         kfree(cr);
913                         return;
914                 }
915         spin_unlock(&queue_lock);
916 }
917
918 /*
919  * Support routines for text-based upcalls.
920  * Fields are separated by spaces.
921  * Fields are either mangled to quote space tab newline slosh with slosh
922  * or a hexified with a leading \x
923  * Record is terminated with newline.
924  *
925  */
926
927 void qword_add(char **bpp, int *lp, char *str)
928 {
929         char *bp = *bpp;
930         int len = *lp;
931         char c;
932
933         if (len < 0) return;
934
935         while ((c=*str++) && len)
936                 switch(c) {
937                 case ' ':
938                 case '\t':
939                 case '\n':
940                 case '\\':
941                         if (len >= 4) {
942                                 *bp++ = '\\';
943                                 *bp++ = '0' + ((c & 0300)>>6);
944                                 *bp++ = '0' + ((c & 0070)>>3);
945                                 *bp++ = '0' + ((c & 0007)>>0);
946                         }
947                         len -= 4;
948                         break;
949                 default:
950                         *bp++ = c;
951                         len--;
952                 }
953         if (c || len <1) len = -1;
954         else {
955                 *bp++ = ' ';
956                 len--;
957         }
958         *bpp = bp;
959         *lp = len;
960 }
961
962 void qword_addhex(char **bpp, int *lp, char *buf, int blen)
963 {
964         char *bp = *bpp;
965         int len = *lp;
966
967         if (len < 0) return;
968
969         if (len > 2) {
970                 *bp++ = '\\';
971                 *bp++ = 'x';
972                 len -= 2;
973                 while (blen && len >= 2) {
974                         unsigned char c = *buf++;
975                         *bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
976                         *bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
977                         len -= 2;
978                         blen--;
979                 }
980         }
981         if (blen || len<1) len = -1;
982         else {
983                 *bp++ = ' ';
984                 len--;
985         }
986         *bpp = bp;
987         *lp = len;
988 }
989
990 static void warn_no_listener(struct cache_detail *detail)
991 {
992         if (detail->last_warn != detail->last_close) {
993                 detail->last_warn = detail->last_close;
994                 if (detail->warn_no_listener)
995                         detail->warn_no_listener(detail);
996         }
997 }
998
999 /*
1000  * register an upcall request to user-space.
1001  * Each request is at most one page long.
1002  */
1003 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
1004 {
1005
1006         char *buf;
1007         struct cache_request *crq;
1008         char *bp;
1009         int len;
1010
1011         if (detail->cache_request == NULL)
1012                 return -EINVAL;
1013
1014         if (atomic_read(&detail->readers) == 0 &&
1015             detail->last_close < get_seconds() - 30) {
1016                         warn_no_listener(detail);
1017                         return -EINVAL;
1018         }
1019
1020         buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1021         if (!buf)
1022                 return -EAGAIN;
1023
1024         crq = kmalloc(sizeof (*crq), GFP_KERNEL);
1025         if (!crq) {
1026                 kfree(buf);
1027                 return -EAGAIN;
1028         }
1029
1030         bp = buf; len = PAGE_SIZE;
1031
1032         detail->cache_request(detail, h, &bp, &len);
1033
1034         if (len < 0) {
1035                 kfree(buf);
1036                 kfree(crq);
1037                 return -EAGAIN;
1038         }
1039         crq->q.reader = 0;
1040         crq->item = cache_get(h);
1041         crq->buf = buf;
1042         crq->len = PAGE_SIZE - len;
1043         crq->readers = 0;
1044         spin_lock(&queue_lock);
1045         list_add_tail(&crq->q.list, &detail->queue);
1046         spin_unlock(&queue_lock);
1047         wake_up(&queue_wait);
1048         return 0;
1049 }
1050
1051 /*
1052  * parse a message from user-space and pass it
1053  * to an appropriate cache
1054  * Messages are, like requests, separated into fields by
1055  * spaces and dequotes as \xHEXSTRING or embedded \nnn octal
1056  *
1057  * Message is 
1058  *   reply cachename expiry key ... content....
1059  *
1060  * key and content are both parsed by cache 
1061  */
1062
1063 #define isodigit(c) (isdigit(c) && c <= '7')
1064 int qword_get(char **bpp, char *dest, int bufsize)
1065 {
1066         /* return bytes copied, or -1 on error */
1067         char *bp = *bpp;
1068         int len = 0;
1069
1070         while (*bp == ' ') bp++;
1071
1072         if (bp[0] == '\\' && bp[1] == 'x') {
1073                 /* HEX STRING */
1074                 bp += 2;
1075                 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
1076                         int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
1077                         bp++;
1078                         byte <<= 4;
1079                         byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
1080                         *dest++ = byte;
1081                         bp++;
1082                         len++;
1083                 }
1084         } else {
1085                 /* text with \nnn octal quoting */
1086                 while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) {
1087                         if (*bp == '\\' &&
1088                             isodigit(bp[1]) && (bp[1] <= '3') &&
1089                             isodigit(bp[2]) &&
1090                             isodigit(bp[3])) {
1091                                 int byte = (*++bp -'0');
1092                                 bp++;
1093                                 byte = (byte << 3) | (*bp++ - '0');
1094                                 byte = (byte << 3) | (*bp++ - '0');
1095                                 *dest++ = byte;
1096                                 len++;
1097                         } else {
1098                                 *dest++ = *bp++;
1099                                 len++;
1100                         }
1101                 }
1102         }
1103
1104         if (*bp != ' ' && *bp != '\n' && *bp != '\0')
1105                 return -1;
1106         while (*bp == ' ') bp++;
1107         *bpp = bp;
1108         *dest = '\0';
1109         return len;
1110 }
1111
1112
1113 /*
1114  * support /proc/sunrpc/cache/$CACHENAME/content
1115  * as a seqfile.
1116  * We call ->cache_show passing NULL for the item to
1117  * get a header, then pass each real item in the cache
1118  */
1119
1120 struct handle {
1121         struct cache_detail *cd;
1122 };
1123
1124 static void *c_start(struct seq_file *m, loff_t *pos)
1125 {
1126         loff_t n = *pos;
1127         unsigned hash, entry;
1128         struct cache_head *ch;
1129         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1130         
1131
1132         read_lock(&cd->hash_lock);
1133         if (!n--)
1134                 return SEQ_START_TOKEN;
1135         hash = n >> 32;
1136         entry = n & ((1LL<<32) - 1);
1137
1138         for (ch=cd->hash_table[hash]; ch; ch=ch->next)
1139                 if (!entry--)
1140                         return ch;
1141         n &= ~((1LL<<32) - 1);
1142         do {
1143                 hash++;
1144                 n += 1LL<<32;
1145         } while(hash < cd->hash_size && 
1146                 cd->hash_table[hash]==NULL);
1147         if (hash >= cd->hash_size)
1148                 return NULL;
1149         *pos = n+1;
1150         return cd->hash_table[hash];
1151 }
1152
1153 static void *c_next(struct seq_file *m, void *p, loff_t *pos)
1154 {
1155         struct cache_head *ch = p;
1156         int hash = (*pos >> 32);
1157         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1158
1159         if (p == SEQ_START_TOKEN)
1160                 hash = 0;
1161         else if (ch->next == NULL) {
1162                 hash++;
1163                 *pos += 1LL<<32;
1164         } else {
1165                 ++*pos;
1166                 return ch->next;
1167         }
1168         *pos &= ~((1LL<<32) - 1);
1169         while (hash < cd->hash_size &&
1170                cd->hash_table[hash] == NULL) {
1171                 hash++;
1172                 *pos += 1LL<<32;
1173         }
1174         if (hash >= cd->hash_size)
1175                 return NULL;
1176         ++*pos;
1177         return cd->hash_table[hash];
1178 }
1179
1180 static void c_stop(struct seq_file *m, void *p)
1181 {
1182         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1183         read_unlock(&cd->hash_lock);
1184 }
1185
1186 static int c_show(struct seq_file *m, void *p)
1187 {
1188         struct cache_head *cp = p;
1189         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1190
1191         if (p == SEQ_START_TOKEN)
1192                 return cd->cache_show(m, cd, NULL);
1193
1194         ifdebug(CACHE)
1195                 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
1196                            cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags);
1197         cache_get(cp);
1198         if (cache_check(cd, cp, NULL))
1199                 /* cache_check does a cache_put on failure */
1200                 seq_printf(m, "# ");
1201         else
1202                 cache_put(cp, cd);
1203
1204         return cd->cache_show(m, cd, cp);
1205 }
1206
1207 static struct seq_operations cache_content_op = {
1208         .start  = c_start,
1209         .next   = c_next,
1210         .stop   = c_stop,
1211         .show   = c_show,
1212 };
1213
1214 static int content_open(struct inode *inode, struct file *file)
1215 {
1216         int res;
1217         struct handle *han;
1218         struct cache_detail *cd = PDE(inode)->data;
1219
1220         han = kmalloc(sizeof(*han), GFP_KERNEL);
1221         if (han == NULL)
1222                 return -ENOMEM;
1223
1224         han->cd = cd;
1225
1226         res = seq_open(file, &cache_content_op);
1227         if (res)
1228                 kfree(han);
1229         else
1230                 ((struct seq_file *)file->private_data)->private = han;
1231
1232         return res;
1233 }
1234 static int content_release(struct inode *inode, struct file *file)
1235 {
1236         struct seq_file *m = (struct seq_file *)file->private_data;
1237         struct handle *han = m->private;
1238         kfree(han);
1239         m->private = NULL;
1240         return seq_release(inode, file);
1241 }
1242
1243 static struct file_operations content_file_operations = {
1244         .open           = content_open,
1245         .read           = seq_read,
1246         .llseek         = seq_lseek,
1247         .release        = content_release,
1248 };
1249
1250 static ssize_t read_flush(struct file *file, char __user *buf,
1251                             size_t count, loff_t *ppos)
1252 {
1253         struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1254         char tbuf[20];
1255         unsigned long p = *ppos;
1256         int len;
1257
1258         sprintf(tbuf, "%lu\n", cd->flush_time);
1259         len = strlen(tbuf);
1260         if (p >= len)
1261                 return 0;
1262         len -= p;
1263         if (len > count) len = count;
1264         if (copy_to_user(buf, (void*)(tbuf+p), len))
1265                 len = -EFAULT;
1266         else
1267                 *ppos += len;
1268         return len;
1269 }
1270
1271 static ssize_t write_flush(struct file * file, const char __user * buf,
1272                              size_t count, loff_t *ppos)
1273 {
1274         struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1275         char tbuf[20];
1276         char *ep;
1277         long flushtime;
1278         if (*ppos || count > sizeof(tbuf)-1)
1279                 return -EINVAL;
1280         if (copy_from_user(tbuf, buf, count))
1281                 return -EFAULT;
1282         tbuf[count] = 0;
1283         flushtime = simple_strtoul(tbuf, &ep, 0);
1284         if (*ep && *ep != '\n')
1285                 return -EINVAL;
1286
1287         cd->flush_time = flushtime;
1288         cd->nextcheck = get_seconds();
1289         cache_flush();
1290
1291         *ppos += count;
1292         return count;
1293 }
1294
1295 static struct file_operations cache_flush_operations = {
1296         .open           = nonseekable_open,
1297         .read           = read_flush,
1298         .write          = write_flush,
1299 };