vserver 1.9.5.x5
[linux-2.6.git] / net / xfrm / xfrm_policy.c
1 /* 
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *      
14  */
15
16 #include <linux/config.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <net/xfrm.h>
26 #include <net/ip.h>
27
28 DECLARE_MUTEX(xfrm_cfg_sem);
29 EXPORT_SYMBOL(xfrm_cfg_sem);
30
31 static DEFINE_RWLOCK(xfrm_policy_lock);
32
33 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
34 EXPORT_SYMBOL(xfrm_policy_list);
35
36 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
37 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
38
39 static kmem_cache_t *xfrm_dst_cache;
40
41 static struct work_struct xfrm_policy_gc_work;
42 static struct list_head xfrm_policy_gc_list =
43         LIST_HEAD_INIT(xfrm_policy_gc_list);
44 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
45
46 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
47 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
48
49 int xfrm_register_type(struct xfrm_type *type, unsigned short family)
50 {
51         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
52         struct xfrm_type_map *typemap;
53         int err = 0;
54
55         if (unlikely(afinfo == NULL))
56                 return -EAFNOSUPPORT;
57         typemap = afinfo->type_map;
58
59         write_lock(&typemap->lock);
60         if (likely(typemap->map[type->proto] == NULL))
61                 typemap->map[type->proto] = type;
62         else
63                 err = -EEXIST;
64         write_unlock(&typemap->lock);
65         xfrm_policy_put_afinfo(afinfo);
66         return err;
67 }
68 EXPORT_SYMBOL(xfrm_register_type);
69
70 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
71 {
72         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
73         struct xfrm_type_map *typemap;
74         int err = 0;
75
76         if (unlikely(afinfo == NULL))
77                 return -EAFNOSUPPORT;
78         typemap = afinfo->type_map;
79
80         write_lock(&typemap->lock);
81         if (unlikely(typemap->map[type->proto] != type))
82                 err = -ENOENT;
83         else
84                 typemap->map[type->proto] = NULL;
85         write_unlock(&typemap->lock);
86         xfrm_policy_put_afinfo(afinfo);
87         return err;
88 }
89 EXPORT_SYMBOL(xfrm_unregister_type);
90
91 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
92 {
93         struct xfrm_policy_afinfo *afinfo;
94         struct xfrm_type_map *typemap;
95         struct xfrm_type *type;
96         int modload_attempted = 0;
97
98 retry:
99         afinfo = xfrm_policy_get_afinfo(family);
100         if (unlikely(afinfo == NULL))
101                 return NULL;
102         typemap = afinfo->type_map;
103
104         read_lock(&typemap->lock);
105         type = typemap->map[proto];
106         if (unlikely(type && !try_module_get(type->owner)))
107                 type = NULL;
108         read_unlock(&typemap->lock);
109         if (!type && !modload_attempted) {
110                 xfrm_policy_put_afinfo(afinfo);
111                 request_module("xfrm-type-%d-%d",
112                                (int) family, (int) proto);
113                 modload_attempted = 1;
114                 goto retry;
115         }
116
117         xfrm_policy_put_afinfo(afinfo);
118         return type;
119 }
120 EXPORT_SYMBOL(xfrm_get_type);
121
122 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
123                     unsigned short family)
124 {
125         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
126         int err = 0;
127
128         if (unlikely(afinfo == NULL))
129                 return -EAFNOSUPPORT;
130
131         if (likely(afinfo->dst_lookup != NULL))
132                 err = afinfo->dst_lookup(dst, fl);
133         else
134                 err = -EINVAL;
135         xfrm_policy_put_afinfo(afinfo);
136         return err;
137 }
138 EXPORT_SYMBOL(xfrm_dst_lookup);
139
140 void xfrm_put_type(struct xfrm_type *type)
141 {
142         module_put(type->owner);
143 }
144
145 static inline unsigned long make_jiffies(long secs)
146 {
147         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
148                 return MAX_SCHEDULE_TIMEOUT-1;
149         else
150                 return secs*HZ;
151 }
152
153 static void xfrm_policy_timer(unsigned long data)
154 {
155         struct xfrm_policy *xp = (struct xfrm_policy*)data;
156         unsigned long now = (unsigned long)xtime.tv_sec;
157         long next = LONG_MAX;
158         int warn = 0;
159         int dir;
160
161         read_lock(&xp->lock);
162
163         if (xp->dead)
164                 goto out;
165
166         dir = xp->index & 7;
167
168         if (xp->lft.hard_add_expires_seconds) {
169                 long tmo = xp->lft.hard_add_expires_seconds +
170                         xp->curlft.add_time - now;
171                 if (tmo <= 0)
172                         goto expired;
173                 if (tmo < next)
174                         next = tmo;
175         }
176         if (xp->lft.hard_use_expires_seconds) {
177                 long tmo = xp->lft.hard_use_expires_seconds +
178                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
179                 if (tmo <= 0)
180                         goto expired;
181                 if (tmo < next)
182                         next = tmo;
183         }
184         if (xp->lft.soft_add_expires_seconds) {
185                 long tmo = xp->lft.soft_add_expires_seconds +
186                         xp->curlft.add_time - now;
187                 if (tmo <= 0) {
188                         warn = 1;
189                         tmo = XFRM_KM_TIMEOUT;
190                 }
191                 if (tmo < next)
192                         next = tmo;
193         }
194         if (xp->lft.soft_use_expires_seconds) {
195                 long tmo = xp->lft.soft_use_expires_seconds +
196                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
197                 if (tmo <= 0) {
198                         warn = 1;
199                         tmo = XFRM_KM_TIMEOUT;
200                 }
201                 if (tmo < next)
202                         next = tmo;
203         }
204
205         if (warn)
206                 km_policy_expired(xp, dir, 0);
207         if (next != LONG_MAX &&
208             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
209                 xfrm_pol_hold(xp);
210
211 out:
212         read_unlock(&xp->lock);
213         xfrm_pol_put(xp);
214         return;
215
216 expired:
217         read_unlock(&xp->lock);
218         km_policy_expired(xp, dir, 1);
219         xfrm_policy_delete(xp, dir);
220         xfrm_pol_put(xp);
221 }
222
223
224 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
225  * SPD calls.
226  */
227
228 struct xfrm_policy *xfrm_policy_alloc(int gfp)
229 {
230         struct xfrm_policy *policy;
231
232         policy = kmalloc(sizeof(struct xfrm_policy), gfp);
233
234         if (policy) {
235                 memset(policy, 0, sizeof(struct xfrm_policy));
236                 atomic_set(&policy->refcnt, 1);
237                 rwlock_init(&policy->lock);
238                 init_timer(&policy->timer);
239                 policy->timer.data = (unsigned long)policy;
240                 policy->timer.function = xfrm_policy_timer;
241         }
242         return policy;
243 }
244 EXPORT_SYMBOL(xfrm_policy_alloc);
245
246 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
247
248 void __xfrm_policy_destroy(struct xfrm_policy *policy)
249 {
250         if (!policy->dead)
251                 BUG();
252
253         if (policy->bundles)
254                 BUG();
255
256         if (del_timer(&policy->timer))
257                 BUG();
258
259         kfree(policy);
260 }
261 EXPORT_SYMBOL(__xfrm_policy_destroy);
262
263 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
264 {
265         struct dst_entry *dst;
266
267         while ((dst = policy->bundles) != NULL) {
268                 policy->bundles = dst->next;
269                 dst_free(dst);
270         }
271
272         if (del_timer(&policy->timer))
273                 atomic_dec(&policy->refcnt);
274
275         if (atomic_read(&policy->refcnt) > 1)
276                 flow_cache_flush();
277
278         xfrm_pol_put(policy);
279 }
280
281 static void xfrm_policy_gc_task(void *data)
282 {
283         struct xfrm_policy *policy;
284         struct list_head *entry, *tmp;
285         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
286
287         spin_lock_bh(&xfrm_policy_gc_lock);
288         list_splice_init(&xfrm_policy_gc_list, &gc_list);
289         spin_unlock_bh(&xfrm_policy_gc_lock);
290
291         list_for_each_safe(entry, tmp, &gc_list) {
292                 policy = list_entry(entry, struct xfrm_policy, list);
293                 xfrm_policy_gc_kill(policy);
294         }
295 }
296
297 /* Rule must be locked. Release descentant resources, announce
298  * entry dead. The rule must be unlinked from lists to the moment.
299  */
300
301 static void xfrm_policy_kill(struct xfrm_policy *policy)
302 {
303         write_lock_bh(&policy->lock);
304         if (policy->dead)
305                 goto out;
306
307         policy->dead = 1;
308
309         spin_lock(&xfrm_policy_gc_lock);
310         list_add(&policy->list, &xfrm_policy_gc_list);
311         spin_unlock(&xfrm_policy_gc_lock);
312         schedule_work(&xfrm_policy_gc_work);
313
314 out:
315         write_unlock_bh(&policy->lock);
316 }
317
318 /* Generate new index... KAME seems to generate them ordered by cost
319  * of an absolute inpredictability of ordering of rules. This will not pass. */
320 static u32 xfrm_gen_index(int dir)
321 {
322         u32 idx;
323         struct xfrm_policy *p;
324         static u32 idx_generator;
325
326         for (;;) {
327                 idx = (idx_generator | dir);
328                 idx_generator += 8;
329                 if (idx == 0)
330                         idx = 8;
331                 for (p = xfrm_policy_list[dir]; p; p = p->next) {
332                         if (p->index == idx)
333                                 break;
334                 }
335                 if (!p)
336                         return idx;
337         }
338 }
339
340 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
341 {
342         struct xfrm_policy *pol, **p;
343         struct xfrm_policy *delpol = NULL;
344         struct xfrm_policy **newpos = NULL;
345
346         write_lock_bh(&xfrm_policy_lock);
347         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
348                 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
349                         if (excl) {
350                                 write_unlock_bh(&xfrm_policy_lock);
351                                 return -EEXIST;
352                         }
353                         *p = pol->next;
354                         delpol = pol;
355                         if (policy->priority > pol->priority)
356                                 continue;
357                 } else if (policy->priority >= pol->priority) {
358                         p = &pol->next;
359                         continue;
360                 }
361                 if (!newpos)
362                         newpos = p;
363                 if (delpol)
364                         break;
365                 p = &pol->next;
366         }
367         if (newpos)
368                 p = newpos;
369         xfrm_pol_hold(policy);
370         policy->next = *p;
371         *p = policy;
372         atomic_inc(&flow_cache_genid);
373         policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
374         policy->curlft.add_time = (unsigned long)xtime.tv_sec;
375         policy->curlft.use_time = 0;
376         if (!mod_timer(&policy->timer, jiffies + HZ))
377                 xfrm_pol_hold(policy);
378         write_unlock_bh(&xfrm_policy_lock);
379
380         if (delpol) {
381                 xfrm_policy_kill(delpol);
382         }
383         return 0;
384 }
385 EXPORT_SYMBOL(xfrm_policy_insert);
386
387 struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
388                                       int delete)
389 {
390         struct xfrm_policy *pol, **p;
391
392         write_lock_bh(&xfrm_policy_lock);
393         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
394                 if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
395                         xfrm_pol_hold(pol);
396                         if (delete)
397                                 *p = pol->next;
398                         break;
399                 }
400         }
401         write_unlock_bh(&xfrm_policy_lock);
402
403         if (pol && delete) {
404                 atomic_inc(&flow_cache_genid);
405                 xfrm_policy_kill(pol);
406         }
407         return pol;
408 }
409 EXPORT_SYMBOL(xfrm_policy_bysel);
410
411 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
412 {
413         struct xfrm_policy *pol, **p;
414
415         write_lock_bh(&xfrm_policy_lock);
416         for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
417                 if (pol->index == id) {
418                         xfrm_pol_hold(pol);
419                         if (delete)
420                                 *p = pol->next;
421                         break;
422                 }
423         }
424         write_unlock_bh(&xfrm_policy_lock);
425
426         if (pol && delete) {
427                 atomic_inc(&flow_cache_genid);
428                 xfrm_policy_kill(pol);
429         }
430         return pol;
431 }
432 EXPORT_SYMBOL(xfrm_policy_byid);
433
434 void xfrm_policy_flush(void)
435 {
436         struct xfrm_policy *xp;
437         int dir;
438
439         write_lock_bh(&xfrm_policy_lock);
440         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
441                 while ((xp = xfrm_policy_list[dir]) != NULL) {
442                         xfrm_policy_list[dir] = xp->next;
443                         write_unlock_bh(&xfrm_policy_lock);
444
445                         xfrm_policy_kill(xp);
446
447                         write_lock_bh(&xfrm_policy_lock);
448                 }
449         }
450         atomic_inc(&flow_cache_genid);
451         write_unlock_bh(&xfrm_policy_lock);
452 }
453 EXPORT_SYMBOL(xfrm_policy_flush);
454
455 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
456                      void *data)
457 {
458         struct xfrm_policy *xp;
459         int dir;
460         int count = 0;
461         int error = 0;
462
463         read_lock_bh(&xfrm_policy_lock);
464         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
465                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
466                         count++;
467         }
468
469         if (count == 0) {
470                 error = -ENOENT;
471                 goto out;
472         }
473
474         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
475                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
476                         error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
477                         if (error)
478                                 goto out;
479                 }
480         }
481
482 out:
483         read_unlock_bh(&xfrm_policy_lock);
484         return error;
485 }
486 EXPORT_SYMBOL(xfrm_policy_walk);
487
488 /* Find policy to apply to this flow. */
489
490 static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
491                                void **objp, atomic_t **obj_refp)
492 {
493         struct xfrm_policy *pol;
494
495         read_lock_bh(&xfrm_policy_lock);
496         for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
497                 struct xfrm_selector *sel = &pol->selector;
498                 int match;
499
500                 if (pol->family != family)
501                         continue;
502
503                 match = xfrm_selector_match(sel, fl, family);
504                 if (match) {
505                         xfrm_pol_hold(pol);
506                         break;
507                 }
508         }
509         read_unlock_bh(&xfrm_policy_lock);
510         if ((*objp = (void *) pol) != NULL)
511                 *obj_refp = &pol->refcnt;
512 }
513
514 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
515 {
516         struct xfrm_policy *pol;
517
518         read_lock_bh(&xfrm_policy_lock);
519         if ((pol = sk->sk_policy[dir]) != NULL) {
520                 int match = xfrm_selector_match(&pol->selector, fl,
521                                                 sk->sk_family);
522                 if (match)
523                         xfrm_pol_hold(pol);
524                 else
525                         pol = NULL;
526         }
527         read_unlock_bh(&xfrm_policy_lock);
528         return pol;
529 }
530
531 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
532 {
533         pol->next = xfrm_policy_list[dir];
534         xfrm_policy_list[dir] = pol;
535         xfrm_pol_hold(pol);
536 }
537
538 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
539                                                 int dir)
540 {
541         struct xfrm_policy **polp;
542
543         for (polp = &xfrm_policy_list[dir];
544              *polp != NULL; polp = &(*polp)->next) {
545                 if (*polp == pol) {
546                         *polp = pol->next;
547                         return pol;
548                 }
549         }
550         return NULL;
551 }
552
553 void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
554 {
555         write_lock_bh(&xfrm_policy_lock);
556         pol = __xfrm_policy_unlink(pol, dir);
557         write_unlock_bh(&xfrm_policy_lock);
558         if (pol) {
559                 if (dir < XFRM_POLICY_MAX)
560                         atomic_inc(&flow_cache_genid);
561                 xfrm_policy_kill(pol);
562         }
563 }
564
565 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
566 {
567         struct xfrm_policy *old_pol;
568
569         write_lock_bh(&xfrm_policy_lock);
570         old_pol = sk->sk_policy[dir];
571         sk->sk_policy[dir] = pol;
572         if (pol) {
573                 pol->curlft.add_time = (unsigned long)xtime.tv_sec;
574                 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
575                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
576         }
577         if (old_pol)
578                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
579         write_unlock_bh(&xfrm_policy_lock);
580
581         if (old_pol) {
582                 xfrm_policy_kill(old_pol);
583         }
584         return 0;
585 }
586
587 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
588 {
589         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
590
591         if (newp) {
592                 newp->selector = old->selector;
593                 newp->lft = old->lft;
594                 newp->curlft = old->curlft;
595                 newp->action = old->action;
596                 newp->flags = old->flags;
597                 newp->xfrm_nr = old->xfrm_nr;
598                 newp->index = old->index;
599                 memcpy(newp->xfrm_vec, old->xfrm_vec,
600                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
601                 write_lock_bh(&xfrm_policy_lock);
602                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
603                 write_unlock_bh(&xfrm_policy_lock);
604                 xfrm_pol_put(newp);
605         }
606         return newp;
607 }
608
609 int __xfrm_sk_clone_policy(struct sock *sk)
610 {
611         struct xfrm_policy *p0 = sk->sk_policy[0],
612                            *p1 = sk->sk_policy[1];
613
614         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
615         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
616                 return -ENOMEM;
617         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
618                 return -ENOMEM;
619         return 0;
620 }
621
622 /* Resolve list of templates for the flow, given policy. */
623
624 static int
625 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
626                   struct xfrm_state **xfrm,
627                   unsigned short family)
628 {
629         int nx;
630         int i, error;
631         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
632         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
633
634         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
635                 struct xfrm_state *x;
636                 xfrm_address_t *remote = daddr;
637                 xfrm_address_t *local  = saddr;
638                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
639
640                 if (tmpl->mode) {
641                         remote = &tmpl->id.daddr;
642                         local = &tmpl->saddr;
643                 }
644
645                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
646
647                 if (x && x->km.state == XFRM_STATE_VALID) {
648                         xfrm[nx++] = x;
649                         daddr = remote;
650                         saddr = local;
651                         continue;
652                 }
653                 if (x) {
654                         error = (x->km.state == XFRM_STATE_ERROR ?
655                                  -EINVAL : -EAGAIN);
656                         xfrm_state_put(x);
657                 }
658
659                 if (!tmpl->optional)
660                         goto fail;
661         }
662         return nx;
663
664 fail:
665         for (nx--; nx>=0; nx--)
666                 xfrm_state_put(xfrm[nx]);
667         return error;
668 }
669
670 /* Check that the bundle accepts the flow and its components are
671  * still valid.
672  */
673
674 static struct dst_entry *
675 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
676 {
677         struct dst_entry *x;
678         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
679         if (unlikely(afinfo == NULL))
680                 return ERR_PTR(-EINVAL);
681         x = afinfo->find_bundle(fl, policy);
682         xfrm_policy_put_afinfo(afinfo);
683         return x;
684 }
685
686 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
687  * all the metrics... Shortly, bundle a bundle.
688  */
689
690 static int
691 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
692                    struct flowi *fl, struct dst_entry **dst_p,
693                    unsigned short family)
694 {
695         int err;
696         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
697         if (unlikely(afinfo == NULL))
698                 return -EINVAL;
699         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
700         xfrm_policy_put_afinfo(afinfo);
701         return err;
702 }
703
704 static inline int policy_to_flow_dir(int dir)
705 {
706         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
707             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
708             XFRM_POLICY_FWD == FLOW_DIR_FWD)
709                 return dir;
710         switch (dir) {
711         default:
712         case XFRM_POLICY_IN:
713                 return FLOW_DIR_IN;
714         case XFRM_POLICY_OUT:
715                 return FLOW_DIR_OUT;
716         case XFRM_POLICY_FWD:
717                 return FLOW_DIR_FWD;
718         };
719 }
720
721 static int stale_bundle(struct dst_entry *dst);
722
723 /* Main function: finds/creates a bundle for given flow.
724  *
725  * At the moment we eat a raw IP route. Mostly to speed up lookups
726  * on interfaces with disabled IPsec.
727  */
728 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
729                 struct sock *sk, int flags)
730 {
731         struct xfrm_policy *policy;
732         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
733         struct dst_entry *dst, *dst_orig = *dst_p;
734         int nx = 0;
735         int err;
736         u32 genid;
737         u16 family = dst_orig->ops->family;
738 restart:
739         genid = atomic_read(&flow_cache_genid);
740         policy = NULL;
741         if (sk && sk->sk_policy[1])
742                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
743
744         if (!policy) {
745                 /* To accelerate a bit...  */
746                 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
747                         return 0;
748
749                 policy = flow_cache_lookup(fl, family,
750                                            policy_to_flow_dir(XFRM_POLICY_OUT),
751                                            xfrm_policy_lookup);
752         }
753
754         if (!policy)
755                 return 0;
756
757         policy->curlft.use_time = (unsigned long)xtime.tv_sec;
758
759         switch (policy->action) {
760         case XFRM_POLICY_BLOCK:
761                 /* Prohibit the flow */
762                 xfrm_pol_put(policy);
763                 return -EPERM;
764
765         case XFRM_POLICY_ALLOW:
766                 if (policy->xfrm_nr == 0) {
767                         /* Flow passes not transformed. */
768                         xfrm_pol_put(policy);
769                         return 0;
770                 }
771
772                 /* Try to find matching bundle.
773                  *
774                  * LATER: help from flow cache. It is optional, this
775                  * is required only for output policy.
776                  */
777                 dst = xfrm_find_bundle(fl, policy, family);
778                 if (IS_ERR(dst)) {
779                         xfrm_pol_put(policy);
780                         return PTR_ERR(dst);
781                 }
782
783                 if (dst)
784                         break;
785
786                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
787
788                 if (unlikely(nx<0)) {
789                         err = nx;
790                         if (err == -EAGAIN && flags) {
791                                 DECLARE_WAITQUEUE(wait, current);
792
793                                 add_wait_queue(&km_waitq, &wait);
794                                 set_current_state(TASK_INTERRUPTIBLE);
795                                 schedule();
796                                 set_current_state(TASK_RUNNING);
797                                 remove_wait_queue(&km_waitq, &wait);
798
799                                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
800
801                                 if (nx == -EAGAIN && signal_pending(current)) {
802                                         err = -ERESTART;
803                                         goto error;
804                                 }
805                                 if (nx == -EAGAIN ||
806                                     genid != atomic_read(&flow_cache_genid)) {
807                                         xfrm_pol_put(policy);
808                                         goto restart;
809                                 }
810                                 err = nx;
811                         }
812                         if (err < 0)
813                                 goto error;
814                 }
815                 if (nx == 0) {
816                         /* Flow passes not transformed. */
817                         xfrm_pol_put(policy);
818                         return 0;
819                 }
820
821                 dst = dst_orig;
822                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
823
824                 if (unlikely(err)) {
825                         int i;
826                         for (i=0; i<nx; i++)
827                                 xfrm_state_put(xfrm[i]);
828                         goto error;
829                 }
830
831                 write_lock_bh(&policy->lock);
832                 if (unlikely(policy->dead || stale_bundle(dst))) {
833                         /* Wow! While we worked on resolving, this
834                          * policy has gone. Retry. It is not paranoia,
835                          * we just cannot enlist new bundle to dead object.
836                          * We can't enlist stable bundles either.
837                          */
838                         write_unlock_bh(&policy->lock);
839
840                         xfrm_pol_put(policy);
841                         if (dst)
842                                 dst_free(dst);
843                         goto restart;
844                 }
845                 dst->next = policy->bundles;
846                 policy->bundles = dst;
847                 dst_hold(dst);
848                 write_unlock_bh(&policy->lock);
849         }
850         *dst_p = dst;
851         dst_release(dst_orig);
852         xfrm_pol_put(policy);
853         return 0;
854
855 error:
856         dst_release(dst_orig);
857         xfrm_pol_put(policy);
858         *dst_p = NULL;
859         return err;
860 }
861 EXPORT_SYMBOL(xfrm_lookup);
862
863 /* When skb is transformed back to its "native" form, we have to
864  * check policy restrictions. At the moment we make this in maximally
865  * stupid way. Shame on me. :-) Of course, connected sockets must
866  * have policy cached at them.
867  */
868
869 static inline int
870 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
871               unsigned short family)
872 {
873         if (xfrm_state_kern(x))
874                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
875         return  x->id.proto == tmpl->id.proto &&
876                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
877                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
878                 x->props.mode == tmpl->mode &&
879                 (tmpl->aalgos & (1<<x->props.aalgo)) &&
880                 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
881 }
882
883 static inline int
884 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
885                unsigned short family)
886 {
887         int idx = start;
888
889         if (tmpl->optional) {
890                 if (!tmpl->mode)
891                         return start;
892         } else
893                 start = -1;
894         for (; idx < sp->len; idx++) {
895                 if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
896                         return ++idx;
897                 if (sp->x[idx].xvec->props.mode)
898                         break;
899         }
900         return start;
901 }
902
903 static int
904 _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
905 {
906         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
907
908         if (unlikely(afinfo == NULL))
909                 return -EAFNOSUPPORT;
910
911         afinfo->decode_session(skb, fl);
912         xfrm_policy_put_afinfo(afinfo);
913         return 0;
914 }
915
916 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
917 {
918         for (; k < sp->len; k++) {
919                 if (sp->x[k].xvec->props.mode)
920                         return 1;
921         }
922
923         return 0;
924 }
925
926 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
927                         unsigned short family)
928 {
929         struct xfrm_policy *pol;
930         struct flowi fl;
931
932         if (_decode_session(skb, &fl, family) < 0)
933                 return 0;
934
935         /* First, check used SA against their selectors. */
936         if (skb->sp) {
937                 int i;
938
939                 for (i=skb->sp->len-1; i>=0; i--) {
940                   struct sec_decap_state *xvec = &(skb->sp->x[i]);
941                         if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
942                                 return 0;
943
944                         /* If there is a post_input processor, try running it */
945                         if (xvec->xvec->type->post_input &&
946                             (xvec->xvec->type->post_input)(xvec->xvec,
947                                                            &(xvec->decap),
948                                                            skb) != 0)
949                                 return 0;
950                 }
951         }
952
953         pol = NULL;
954         if (sk && sk->sk_policy[dir])
955                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
956
957         if (!pol)
958                 pol = flow_cache_lookup(&fl, family,
959                                         policy_to_flow_dir(dir),
960                                         xfrm_policy_lookup);
961
962         if (!pol)
963                 return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
964
965         pol->curlft.use_time = (unsigned long)xtime.tv_sec;
966
967         if (pol->action == XFRM_POLICY_ALLOW) {
968                 struct sec_path *sp;
969                 static struct sec_path dummy;
970                 int i, k;
971
972                 if ((sp = skb->sp) == NULL)
973                         sp = &dummy;
974
975                 /* For each tunnel xfrm, find the first matching tmpl.
976                  * For each tmpl before that, find corresponding xfrm.
977                  * Order is _important_. Later we will implement
978                  * some barriers, but at the moment barriers
979                  * are implied between each two transformations.
980                  */
981                 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
982                         k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
983                         if (k < 0)
984                                 goto reject;
985                 }
986
987                 if (secpath_has_tunnel(sp, k))
988                         goto reject;
989
990                 xfrm_pol_put(pol);
991                 return 1;
992         }
993
994 reject:
995         xfrm_pol_put(pol);
996         return 0;
997 }
998 EXPORT_SYMBOL(__xfrm_policy_check);
999
1000 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1001 {
1002         struct flowi fl;
1003
1004         if (_decode_session(skb, &fl, family) < 0)
1005                 return 0;
1006
1007         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1008 }
1009 EXPORT_SYMBOL(__xfrm_route_forward);
1010
1011 /* Optimize later using cookies and generation ids. */
1012
1013 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1014 {
1015         if (!stale_bundle(dst))
1016                 return dst;
1017
1018         dst_release(dst);
1019         return NULL;
1020 }
1021
1022 static int stale_bundle(struct dst_entry *dst)
1023 {
1024         struct dst_entry *child = dst;
1025
1026         while (child) {
1027                 if (child->obsolete > 0 ||
1028                     (child->dev && !netif_running(child->dev)) ||
1029                     (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
1030                         return 1;
1031                 }
1032                 child = child->child;
1033         }
1034
1035         return 0;
1036 }
1037
1038 static void xfrm_dst_destroy(struct dst_entry *dst)
1039 {
1040         if (!dst->xfrm)
1041                 return;
1042         xfrm_state_put(dst->xfrm);
1043         dst->xfrm = NULL;
1044 }
1045
1046 static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1047                             int unregister)
1048 {
1049         if (!unregister)
1050                 return;
1051
1052         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1053                 dst->dev = &loopback_dev;
1054                 dev_hold(&loopback_dev);
1055                 dev_put(dev);
1056         }
1057 }
1058
1059 static void xfrm_link_failure(struct sk_buff *skb)
1060 {
1061         /* Impossible. Such dst must be popped before reaches point of failure. */
1062         return;
1063 }
1064
1065 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1066 {
1067         if (dst) {
1068                 if (dst->obsolete) {
1069                         dst_release(dst);
1070                         dst = NULL;
1071                 }
1072         }
1073         return dst;
1074 }
1075
1076 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1077 {
1078         int i;
1079         struct xfrm_policy *pol;
1080         struct dst_entry *dst, **dstp, *gc_list = NULL;
1081
1082         read_lock_bh(&xfrm_policy_lock);
1083         for (i=0; i<2*XFRM_POLICY_MAX; i++) {
1084                 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
1085                         write_lock(&pol->lock);
1086                         dstp = &pol->bundles;
1087                         while ((dst=*dstp) != NULL) {
1088                                 if (func(dst)) {
1089                                         *dstp = dst->next;
1090                                         dst->next = gc_list;
1091                                         gc_list = dst;
1092                                 } else {
1093                                         dstp = &dst->next;
1094                                 }
1095                         }
1096                         write_unlock(&pol->lock);
1097                 }
1098         }
1099         read_unlock_bh(&xfrm_policy_lock);
1100
1101         while (gc_list) {
1102                 dst = gc_list;
1103                 gc_list = dst->next;
1104                 dst_free(dst);
1105         }
1106 }
1107
1108 static int unused_bundle(struct dst_entry *dst)
1109 {
1110         return !atomic_read(&dst->__refcnt);
1111 }
1112
1113 static void __xfrm_garbage_collect(void)
1114 {
1115         xfrm_prune_bundles(unused_bundle);
1116 }
1117
1118 int xfrm_flush_bundles(void)
1119 {
1120         xfrm_prune_bundles(stale_bundle);
1121         return 0;
1122 }
1123
1124 /* Well... that's _TASK_. We need to scan through transformation
1125  * list and figure out what mss tcp should generate in order to
1126  * final datagram fit to mtu. Mama mia... :-)
1127  *
1128  * Apparently, some easy way exists, but we used to choose the most
1129  * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
1130  *
1131  * Consider this function as something like dark humour. :-)
1132  */
1133 static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
1134 {
1135         int res = mtu - dst->header_len;
1136
1137         for (;;) {
1138                 struct dst_entry *d = dst;
1139                 int m = res;
1140
1141                 do {
1142                         struct xfrm_state *x = d->xfrm;
1143                         if (x) {
1144                                 spin_lock_bh(&x->lock);
1145                                 if (x->km.state == XFRM_STATE_VALID &&
1146                                     x->type && x->type->get_max_size)
1147                                         m = x->type->get_max_size(d->xfrm, m);
1148                                 else
1149                                         m += x->props.header_len;
1150                                 spin_unlock_bh(&x->lock);
1151                         }
1152                 } while ((d = d->child) != NULL);
1153
1154                 if (m <= mtu)
1155                         break;
1156                 res -= (m - mtu);
1157                 if (res < 88)
1158                         return mtu;
1159         }
1160
1161         return res + dst->header_len;
1162 }
1163
1164 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1165 {
1166         int err = 0;
1167         if (unlikely(afinfo == NULL))
1168                 return -EINVAL;
1169         if (unlikely(afinfo->family >= NPROTO))
1170                 return -EAFNOSUPPORT;
1171         write_lock(&xfrm_policy_afinfo_lock);
1172         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
1173                 err = -ENOBUFS;
1174         else {
1175                 struct dst_ops *dst_ops = afinfo->dst_ops;
1176                 if (likely(dst_ops->kmem_cachep == NULL))
1177                         dst_ops->kmem_cachep = xfrm_dst_cache;
1178                 if (likely(dst_ops->check == NULL))
1179                         dst_ops->check = xfrm_dst_check;
1180                 if (likely(dst_ops->destroy == NULL))
1181                         dst_ops->destroy = xfrm_dst_destroy;
1182                 if (likely(dst_ops->ifdown == NULL))
1183                         dst_ops->ifdown = xfrm_dst_ifdown;
1184                 if (likely(dst_ops->negative_advice == NULL))
1185                         dst_ops->negative_advice = xfrm_negative_advice;
1186                 if (likely(dst_ops->link_failure == NULL))
1187                         dst_ops->link_failure = xfrm_link_failure;
1188                 if (likely(dst_ops->get_mss == NULL))
1189                         dst_ops->get_mss = xfrm_get_mss;
1190                 if (likely(afinfo->garbage_collect == NULL))
1191                         afinfo->garbage_collect = __xfrm_garbage_collect;
1192                 xfrm_policy_afinfo[afinfo->family] = afinfo;
1193         }
1194         write_unlock(&xfrm_policy_afinfo_lock);
1195         return err;
1196 }
1197 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
1198
1199 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
1200 {
1201         int err = 0;
1202         if (unlikely(afinfo == NULL))
1203                 return -EINVAL;
1204         if (unlikely(afinfo->family >= NPROTO))
1205                 return -EAFNOSUPPORT;
1206         write_lock(&xfrm_policy_afinfo_lock);
1207         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
1208                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
1209                         err = -EINVAL;
1210                 else {
1211                         struct dst_ops *dst_ops = afinfo->dst_ops;
1212                         xfrm_policy_afinfo[afinfo->family] = NULL;
1213                         dst_ops->kmem_cachep = NULL;
1214                         dst_ops->check = NULL;
1215                         dst_ops->destroy = NULL;
1216                         dst_ops->ifdown = NULL;
1217                         dst_ops->negative_advice = NULL;
1218                         dst_ops->link_failure = NULL;
1219                         dst_ops->get_mss = NULL;
1220                         afinfo->garbage_collect = NULL;
1221                 }
1222         }
1223         write_unlock(&xfrm_policy_afinfo_lock);
1224         return err;
1225 }
1226 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
1227
1228 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
1229 {
1230         struct xfrm_policy_afinfo *afinfo;
1231         if (unlikely(family >= NPROTO))
1232                 return NULL;
1233         read_lock(&xfrm_policy_afinfo_lock);
1234         afinfo = xfrm_policy_afinfo[family];
1235         if (likely(afinfo != NULL))
1236                 read_lock(&afinfo->lock);
1237         read_unlock(&xfrm_policy_afinfo_lock);
1238         return afinfo;
1239 }
1240
1241 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
1242 {
1243         if (unlikely(afinfo == NULL))
1244                 return;
1245         read_unlock(&afinfo->lock);
1246 }
1247
1248 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
1249 {
1250         switch (event) {
1251         case NETDEV_DOWN:
1252                 xfrm_flush_bundles();
1253         }
1254         return NOTIFY_DONE;
1255 }
1256
1257 static struct notifier_block xfrm_dev_notifier = {
1258         xfrm_dev_event,
1259         NULL,
1260         0
1261 };
1262
1263 static void __init xfrm_policy_init(void)
1264 {
1265         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
1266                                            sizeof(struct xfrm_dst),
1267                                            0, SLAB_HWCACHE_ALIGN,
1268                                            NULL, NULL);
1269         if (!xfrm_dst_cache)
1270                 panic("XFRM: failed to allocate xfrm_dst_cache\n");
1271
1272         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
1273         register_netdevice_notifier(&xfrm_dev_notifier);
1274 }
1275
1276 void __init xfrm_init(void)
1277 {
1278         xfrm_state_init();
1279         xfrm_policy_init();
1280         xfrm_input_init();
1281 }
1282