This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / net / xfrm / xfrm_policy.c
1 /* 
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *      
14  */
15
16 #include <linux/config.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <net/xfrm.h>
25 #include <net/ip.h>
26
27 DECLARE_MUTEX(xfrm_cfg_sem);
28
29 static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED;
30
31 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
32
33 static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED;
34 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
35
36 kmem_cache_t *xfrm_dst_cache;
37
38 static struct work_struct xfrm_policy_gc_work;
39 static struct list_head xfrm_policy_gc_list =
40         LIST_HEAD_INIT(xfrm_policy_gc_list);
41 static spinlock_t xfrm_policy_gc_lock = SPIN_LOCK_UNLOCKED;
42
43 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
44 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
45
46 int xfrm_register_type(struct xfrm_type *type, unsigned short family)
47 {
48         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
49         struct xfrm_type_map *typemap;
50         int err = 0;
51
52         if (unlikely(afinfo == NULL))
53                 return -EAFNOSUPPORT;
54         typemap = afinfo->type_map;
55
56         write_lock(&typemap->lock);
57         if (likely(typemap->map[type->proto] == NULL))
58                 typemap->map[type->proto] = type;
59         else
60                 err = -EEXIST;
61         write_unlock(&typemap->lock);
62         xfrm_policy_put_afinfo(afinfo);
63         return err;
64 }
65
66 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
67 {
68         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
69         struct xfrm_type_map *typemap;
70         int err = 0;
71
72         if (unlikely(afinfo == NULL))
73                 return -EAFNOSUPPORT;
74         typemap = afinfo->type_map;
75
76         write_lock(&typemap->lock);
77         if (unlikely(typemap->map[type->proto] != type))
78                 err = -ENOENT;
79         else
80                 typemap->map[type->proto] = NULL;
81         write_unlock(&typemap->lock);
82         xfrm_policy_put_afinfo(afinfo);
83         return err;
84 }
85
86 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
87 {
88         struct xfrm_policy_afinfo *afinfo;
89         struct xfrm_type_map *typemap;
90         struct xfrm_type *type;
91         int modload_attempted = 0;
92
93 retry:
94         afinfo = xfrm_policy_get_afinfo(family);
95         if (unlikely(afinfo == NULL))
96                 return NULL;
97         typemap = afinfo->type_map;
98
99         read_lock(&typemap->lock);
100         type = typemap->map[proto];
101         if (unlikely(type && !try_module_get(type->owner)))
102                 type = NULL;
103         read_unlock(&typemap->lock);
104         if (!type && !modload_attempted) {
105                 xfrm_policy_put_afinfo(afinfo);
106                 request_module("xfrm-type-%d-%d",
107                                (int) family, (int) proto);
108                 modload_attempted = 1;
109                 goto retry;
110         }
111
112         xfrm_policy_put_afinfo(afinfo);
113         return type;
114 }
115
116 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
117                     unsigned short family)
118 {
119         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
120         int err = 0;
121
122         if (unlikely(afinfo == NULL))
123                 return -EAFNOSUPPORT;
124
125         if (likely(afinfo->dst_lookup != NULL))
126                 err = afinfo->dst_lookup(dst, fl);
127         else
128                 err = -EINVAL;
129         xfrm_policy_put_afinfo(afinfo);
130         return err;
131 }
132
133 void xfrm_put_type(struct xfrm_type *type)
134 {
135         module_put(type->owner);
136 }
137
138 static inline unsigned long make_jiffies(long secs)
139 {
140         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
141                 return MAX_SCHEDULE_TIMEOUT-1;
142         else
143                 return secs*HZ;
144 }
145
146 static void xfrm_policy_timer(unsigned long data)
147 {
148         struct xfrm_policy *xp = (struct xfrm_policy*)data;
149         unsigned long now = (unsigned long)xtime.tv_sec;
150         long next = LONG_MAX;
151         int warn = 0;
152         int dir;
153
154         read_lock(&xp->lock);
155
156         if (xp->dead)
157                 goto out;
158
159         dir = xp->index & 7;
160
161         if (xp->lft.hard_add_expires_seconds) {
162                 long tmo = xp->lft.hard_add_expires_seconds +
163                         xp->curlft.add_time - now;
164                 if (tmo <= 0)
165                         goto expired;
166                 if (tmo < next)
167                         next = tmo;
168         }
169         if (xp->lft.hard_use_expires_seconds) {
170                 long tmo = xp->lft.hard_use_expires_seconds +
171                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
172                 if (tmo <= 0)
173                         goto expired;
174                 if (tmo < next)
175                         next = tmo;
176         }
177         if (xp->lft.soft_add_expires_seconds) {
178                 long tmo = xp->lft.soft_add_expires_seconds +
179                         xp->curlft.add_time - now;
180                 if (tmo <= 0) {
181                         warn = 1;
182                         tmo = XFRM_KM_TIMEOUT;
183                 }
184                 if (tmo < next)
185                         next = tmo;
186         }
187         if (xp->lft.soft_use_expires_seconds) {
188                 long tmo = xp->lft.soft_use_expires_seconds +
189                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
190                 if (tmo <= 0) {
191                         warn = 1;
192                         tmo = XFRM_KM_TIMEOUT;
193                 }
194                 if (tmo < next)
195                         next = tmo;
196         }
197
198         if (warn)
199                 km_policy_expired(xp, dir, 0);
200         if (next != LONG_MAX &&
201             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
202                 xfrm_pol_hold(xp);
203
204 out:
205         read_unlock(&xp->lock);
206         xfrm_pol_put(xp);
207         return;
208
209 expired:
210         read_unlock(&xp->lock);
211         km_policy_expired(xp, dir, 1);
212         xfrm_policy_delete(xp, dir);
213         xfrm_pol_put(xp);
214 }
215
216
217 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
218  * SPD calls.
219  */
220
221 struct xfrm_policy *xfrm_policy_alloc(int gfp)
222 {
223         struct xfrm_policy *policy;
224
225         policy = kmalloc(sizeof(struct xfrm_policy), gfp);
226
227         if (policy) {
228                 memset(policy, 0, sizeof(struct xfrm_policy));
229                 atomic_set(&policy->refcnt, 1);
230                 policy->lock = RW_LOCK_UNLOCKED;
231                 init_timer(&policy->timer);
232                 policy->timer.data = (unsigned long)policy;
233                 policy->timer.function = xfrm_policy_timer;
234         }
235         return policy;
236 }
237
238 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
239
240 void __xfrm_policy_destroy(struct xfrm_policy *policy)
241 {
242         if (!policy->dead)
243                 BUG();
244
245         if (policy->bundles)
246                 BUG();
247
248         if (del_timer(&policy->timer))
249                 BUG();
250
251         kfree(policy);
252 }
253
254 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
255 {
256         struct dst_entry *dst;
257
258         while ((dst = policy->bundles) != NULL) {
259                 policy->bundles = dst->next;
260                 dst_free(dst);
261         }
262
263         if (del_timer(&policy->timer))
264                 atomic_dec(&policy->refcnt);
265
266         if (atomic_read(&policy->refcnt) > 1)
267                 flow_cache_flush();
268
269         xfrm_pol_put(policy);
270 }
271
272 static void xfrm_policy_gc_task(void *data)
273 {
274         struct xfrm_policy *policy;
275         struct list_head *entry, *tmp;
276         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
277
278         spin_lock_bh(&xfrm_policy_gc_lock);
279         list_splice_init(&xfrm_policy_gc_list, &gc_list);
280         spin_unlock_bh(&xfrm_policy_gc_lock);
281
282         list_for_each_safe(entry, tmp, &gc_list) {
283                 policy = list_entry(entry, struct xfrm_policy, list);
284                 xfrm_policy_gc_kill(policy);
285         }
286 }
287
288 /* Rule must be locked. Release descentant resources, announce
289  * entry dead. The rule must be unlinked from lists to the moment.
290  */
291
292 static void xfrm_policy_kill(struct xfrm_policy *policy)
293 {
294         write_lock_bh(&policy->lock);
295         if (policy->dead)
296                 goto out;
297
298         policy->dead = 1;
299
300         spin_lock(&xfrm_policy_gc_lock);
301         list_add(&policy->list, &xfrm_policy_gc_list);
302         spin_unlock(&xfrm_policy_gc_lock);
303         schedule_work(&xfrm_policy_gc_work);
304
305 out:
306         write_unlock_bh(&policy->lock);
307 }
308
309 /* Generate new index... KAME seems to generate them ordered by cost
310  * of an absolute inpredictability of ordering of rules. This will not pass. */
311 static u32 xfrm_gen_index(int dir)
312 {
313         u32 idx;
314         struct xfrm_policy *p;
315         static u32 idx_generator;
316
317         for (;;) {
318                 idx = (idx_generator | dir);
319                 idx_generator += 8;
320                 if (idx == 0)
321                         idx = 8;
322                 for (p = xfrm_policy_list[dir]; p; p = p->next) {
323                         if (p->index == idx)
324                                 break;
325                 }
326                 if (!p)
327                         return idx;
328         }
329 }
330
331 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
332 {
333         struct xfrm_policy *pol, **p;
334         struct xfrm_policy *delpol = NULL;
335         struct xfrm_policy **newpos = NULL;
336
337         write_lock_bh(&xfrm_policy_lock);
338         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
339                 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
340                         if (excl) {
341                                 write_unlock_bh(&xfrm_policy_lock);
342                                 return -EEXIST;
343                         }
344                         *p = pol->next;
345                         delpol = pol;
346                         if (policy->priority > pol->priority)
347                                 continue;
348                 } else if (policy->priority >= pol->priority) {
349                         p = &pol->next;
350                         continue;
351                 }
352                 if (!newpos)
353                         newpos = p;
354                 if (delpol)
355                         break;
356                 p = &pol->next;
357         }
358         if (newpos)
359                 p = newpos;
360         xfrm_pol_hold(policy);
361         policy->next = *p;
362         *p = policy;
363         atomic_inc(&flow_cache_genid);
364         policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
365         policy->curlft.add_time = (unsigned long)xtime.tv_sec;
366         policy->curlft.use_time = 0;
367         if (!mod_timer(&policy->timer, jiffies + HZ))
368                 xfrm_pol_hold(policy);
369         write_unlock_bh(&xfrm_policy_lock);
370
371         if (delpol) {
372                 xfrm_policy_kill(delpol);
373         }
374         return 0;
375 }
376
377 struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
378                                       int delete)
379 {
380         struct xfrm_policy *pol, **p;
381
382         write_lock_bh(&xfrm_policy_lock);
383         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
384                 if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
385                         xfrm_pol_hold(pol);
386                         if (delete)
387                                 *p = pol->next;
388                         break;
389                 }
390         }
391         write_unlock_bh(&xfrm_policy_lock);
392
393         if (pol && delete) {
394                 atomic_inc(&flow_cache_genid);
395                 xfrm_policy_kill(pol);
396         }
397         return pol;
398 }
399
400 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
401 {
402         struct xfrm_policy *pol, **p;
403
404         write_lock_bh(&xfrm_policy_lock);
405         for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
406                 if (pol->index == id) {
407                         xfrm_pol_hold(pol);
408                         if (delete)
409                                 *p = pol->next;
410                         break;
411                 }
412         }
413         write_unlock_bh(&xfrm_policy_lock);
414
415         if (pol && delete) {
416                 atomic_inc(&flow_cache_genid);
417                 xfrm_policy_kill(pol);
418         }
419         return pol;
420 }
421
422 void xfrm_policy_flush(void)
423 {
424         struct xfrm_policy *xp;
425         int dir;
426
427         write_lock_bh(&xfrm_policy_lock);
428         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
429                 while ((xp = xfrm_policy_list[dir]) != NULL) {
430                         xfrm_policy_list[dir] = xp->next;
431                         write_unlock_bh(&xfrm_policy_lock);
432
433                         xfrm_policy_kill(xp);
434
435                         write_lock_bh(&xfrm_policy_lock);
436                 }
437         }
438         atomic_inc(&flow_cache_genid);
439         write_unlock_bh(&xfrm_policy_lock);
440 }
441
442 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
443                      void *data)
444 {
445         struct xfrm_policy *xp;
446         int dir;
447         int count = 0;
448         int error = 0;
449
450         read_lock_bh(&xfrm_policy_lock);
451         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
452                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
453                         count++;
454         }
455
456         if (count == 0) {
457                 error = -ENOENT;
458                 goto out;
459         }
460
461         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
462                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
463                         error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
464                         if (error)
465                                 goto out;
466                 }
467         }
468
469 out:
470         read_unlock_bh(&xfrm_policy_lock);
471         return error;
472 }
473
474
475 /* Find policy to apply to this flow. */
476
477 static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
478                                void **objp, atomic_t **obj_refp)
479 {
480         struct xfrm_policy *pol;
481
482         read_lock_bh(&xfrm_policy_lock);
483         for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
484                 struct xfrm_selector *sel = &pol->selector;
485                 int match;
486
487                 if (pol->family != family)
488                         continue;
489
490                 match = xfrm_selector_match(sel, fl, family);
491                 if (match) {
492                         xfrm_pol_hold(pol);
493                         break;
494                 }
495         }
496         read_unlock_bh(&xfrm_policy_lock);
497         if ((*objp = (void *) pol) != NULL)
498                 *obj_refp = &pol->refcnt;
499 }
500
501 struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
502 {
503         struct xfrm_policy *pol;
504
505         read_lock_bh(&xfrm_policy_lock);
506         if ((pol = sk->sk_policy[dir]) != NULL) {
507                 int match = xfrm_selector_match(&pol->selector, fl,
508                                                 sk->sk_family);
509                 if (match)
510                         xfrm_pol_hold(pol);
511                 else
512                         pol = NULL;
513         }
514         read_unlock_bh(&xfrm_policy_lock);
515         return pol;
516 }
517
518 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
519 {
520         pol->next = xfrm_policy_list[dir];
521         xfrm_policy_list[dir] = pol;
522         xfrm_pol_hold(pol);
523 }
524
525 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
526                                                 int dir)
527 {
528         struct xfrm_policy **polp;
529
530         for (polp = &xfrm_policy_list[dir];
531              *polp != NULL; polp = &(*polp)->next) {
532                 if (*polp == pol) {
533                         *polp = pol->next;
534                         return pol;
535                 }
536         }
537         return NULL;
538 }
539
540 void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
541 {
542         write_lock_bh(&xfrm_policy_lock);
543         pol = __xfrm_policy_unlink(pol, dir);
544         write_unlock_bh(&xfrm_policy_lock);
545         if (pol) {
546                 if (dir < XFRM_POLICY_MAX)
547                         atomic_inc(&flow_cache_genid);
548                 xfrm_policy_kill(pol);
549         }
550 }
551
552 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
553 {
554         struct xfrm_policy *old_pol;
555
556         write_lock_bh(&xfrm_policy_lock);
557         old_pol = sk->sk_policy[dir];
558         sk->sk_policy[dir] = pol;
559         if (pol) {
560                 pol->curlft.add_time = (unsigned long)xtime.tv_sec;
561                 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
562                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
563         }
564         if (old_pol)
565                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
566         write_unlock_bh(&xfrm_policy_lock);
567
568         if (old_pol) {
569                 xfrm_policy_kill(old_pol);
570         }
571         return 0;
572 }
573
574 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
575 {
576         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
577
578         if (newp) {
579                 newp->selector = old->selector;
580                 newp->lft = old->lft;
581                 newp->curlft = old->curlft;
582                 newp->action = old->action;
583                 newp->flags = old->flags;
584                 newp->xfrm_nr = old->xfrm_nr;
585                 newp->index = old->index;
586                 memcpy(newp->xfrm_vec, old->xfrm_vec,
587                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
588                 write_lock_bh(&xfrm_policy_lock);
589                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
590                 write_unlock_bh(&xfrm_policy_lock);
591                 xfrm_pol_put(newp);
592         }
593         return newp;
594 }
595
596 int __xfrm_sk_clone_policy(struct sock *sk)
597 {
598         struct xfrm_policy *p0 = sk->sk_policy[0],
599                            *p1 = sk->sk_policy[1];
600
601         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
602         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
603                 return -ENOMEM;
604         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
605                 return -ENOMEM;
606         return 0;
607 }
608
609 /* Resolve list of templates for the flow, given policy. */
610
611 static int
612 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
613                   struct xfrm_state **xfrm,
614                   unsigned short family)
615 {
616         int nx;
617         int i, error;
618         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
619         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
620
621         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
622                 struct xfrm_state *x;
623                 xfrm_address_t *remote = daddr;
624                 xfrm_address_t *local  = saddr;
625                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
626
627                 if (tmpl->mode) {
628                         remote = &tmpl->id.daddr;
629                         local = &tmpl->saddr;
630                 }
631
632                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
633
634                 if (x && x->km.state == XFRM_STATE_VALID) {
635                         xfrm[nx++] = x;
636                         daddr = remote;
637                         saddr = local;
638                         continue;
639                 }
640                 if (x) {
641                         error = (x->km.state == XFRM_STATE_ERROR ?
642                                  -EINVAL : -EAGAIN);
643                         xfrm_state_put(x);
644                 }
645
646                 if (!tmpl->optional)
647                         goto fail;
648         }
649         return nx;
650
651 fail:
652         for (nx--; nx>=0; nx--)
653                 xfrm_state_put(xfrm[nx]);
654         return error;
655 }
656
657 /* Check that the bundle accepts the flow and its components are
658  * still valid.
659  */
660
661 static struct dst_entry *
662 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
663 {
664         struct dst_entry *x;
665         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
666         if (unlikely(afinfo == NULL))
667                 return ERR_PTR(-EINVAL);
668         x = afinfo->find_bundle(fl, policy);
669         xfrm_policy_put_afinfo(afinfo);
670         return x;
671 }
672
673 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
674  * all the metrics... Shortly, bundle a bundle.
675  */
676
677 static int
678 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
679                    struct flowi *fl, struct dst_entry **dst_p,
680                    unsigned short family)
681 {
682         int err;
683         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
684         if (unlikely(afinfo == NULL))
685                 return -EINVAL;
686         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
687         xfrm_policy_put_afinfo(afinfo);
688         return err;
689 }
690
691 static inline int policy_to_flow_dir(int dir)
692 {
693         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
694             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
695             XFRM_POLICY_FWD == FLOW_DIR_FWD)
696                 return dir;
697         switch (dir) {
698         default:
699         case XFRM_POLICY_IN:
700                 return FLOW_DIR_IN;
701         case XFRM_POLICY_OUT:
702                 return FLOW_DIR_OUT;
703         case XFRM_POLICY_FWD:
704                 return FLOW_DIR_FWD;
705         };
706 }
707
708 static int stale_bundle(struct dst_entry *dst);
709
710 /* Main function: finds/creates a bundle for given flow.
711  *
712  * At the moment we eat a raw IP route. Mostly to speed up lookups
713  * on interfaces with disabled IPsec.
714  */
715 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
716                 struct sock *sk, int flags)
717 {
718         struct xfrm_policy *policy;
719         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
720         struct dst_entry *dst, *dst_orig = *dst_p;
721         int nx = 0;
722         int err;
723         u32 genid;
724         u16 family = dst_orig->ops->family;
725 restart:
726         genid = atomic_read(&flow_cache_genid);
727         policy = NULL;
728         if (sk && sk->sk_policy[1])
729                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
730
731         if (!policy) {
732                 /* To accelerate a bit...  */
733                 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
734                         return 0;
735
736                 policy = flow_cache_lookup(fl, family,
737                                            policy_to_flow_dir(XFRM_POLICY_OUT),
738                                            xfrm_policy_lookup);
739         }
740
741         if (!policy)
742                 return 0;
743
744         policy->curlft.use_time = (unsigned long)xtime.tv_sec;
745
746         switch (policy->action) {
747         case XFRM_POLICY_BLOCK:
748                 /* Prohibit the flow */
749                 xfrm_pol_put(policy);
750                 return -EPERM;
751
752         case XFRM_POLICY_ALLOW:
753                 if (policy->xfrm_nr == 0) {
754                         /* Flow passes not transformed. */
755                         xfrm_pol_put(policy);
756                         return 0;
757                 }
758
759                 /* Try to find matching bundle.
760                  *
761                  * LATER: help from flow cache. It is optional, this
762                  * is required only for output policy.
763                  */
764                 dst = xfrm_find_bundle(fl, policy, family);
765                 if (IS_ERR(dst)) {
766                         xfrm_pol_put(policy);
767                         return PTR_ERR(dst);
768                 }
769
770                 if (dst)
771                         break;
772
773                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
774
775                 if (unlikely(nx<0)) {
776                         err = nx;
777                         if (err == -EAGAIN && flags) {
778                                 DECLARE_WAITQUEUE(wait, current);
779
780                                 add_wait_queue(&km_waitq, &wait);
781                                 set_current_state(TASK_INTERRUPTIBLE);
782                                 schedule();
783                                 set_current_state(TASK_RUNNING);
784                                 remove_wait_queue(&km_waitq, &wait);
785
786                                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
787
788                                 if (nx == -EAGAIN && signal_pending(current)) {
789                                         err = -ERESTART;
790                                         goto error;
791                                 }
792                                 if (nx == -EAGAIN ||
793                                     genid != atomic_read(&flow_cache_genid)) {
794                                         xfrm_pol_put(policy);
795                                         goto restart;
796                                 }
797                                 err = nx;
798                         }
799                         if (err < 0)
800                                 goto error;
801                 }
802                 if (nx == 0) {
803                         /* Flow passes not transformed. */
804                         xfrm_pol_put(policy);
805                         return 0;
806                 }
807
808                 dst = dst_orig;
809                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
810
811                 if (unlikely(err)) {
812                         int i;
813                         for (i=0; i<nx; i++)
814                                 xfrm_state_put(xfrm[i]);
815                         goto error;
816                 }
817
818                 write_lock_bh(&policy->lock);
819                 if (unlikely(policy->dead || stale_bundle(dst))) {
820                         /* Wow! While we worked on resolving, this
821                          * policy has gone. Retry. It is not paranoia,
822                          * we just cannot enlist new bundle to dead object.
823                          * We can't enlist stable bundles either.
824                          */
825                         write_unlock_bh(&policy->lock);
826
827                         xfrm_pol_put(policy);
828                         if (dst)
829                                 dst_free(dst);
830                         goto restart;
831                 }
832                 dst->next = policy->bundles;
833                 policy->bundles = dst;
834                 dst_hold(dst);
835                 write_unlock_bh(&policy->lock);
836         }
837         *dst_p = dst;
838         dst_release(dst_orig);
839         xfrm_pol_put(policy);
840         return 0;
841
842 error:
843         dst_release(dst_orig);
844         xfrm_pol_put(policy);
845         *dst_p = NULL;
846         return err;
847 }
848
849 /* When skb is transformed back to its "native" form, we have to
850  * check policy restrictions. At the moment we make this in maximally
851  * stupid way. Shame on me. :-) Of course, connected sockets must
852  * have policy cached at them.
853  */
854
855 static inline int
856 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
857               unsigned short family)
858 {
859         if (xfrm_state_kern(x))
860                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
861         return  x->id.proto == tmpl->id.proto &&
862                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
863                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
864                 x->props.mode == tmpl->mode &&
865                 (tmpl->aalgos & (1<<x->props.aalgo)) &&
866                 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
867 }
868
869 static inline int
870 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
871                unsigned short family)
872 {
873         int idx = start;
874
875         if (tmpl->optional) {
876                 if (!tmpl->mode)
877                         return start;
878         } else
879                 start = -1;
880         for (; idx < sp->len; idx++) {
881                 if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
882                         return ++idx;
883                 if (sp->x[idx].xvec->props.mode)
884                         break;
885         }
886         return start;
887 }
888
889 static int
890 _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
891 {
892         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
893
894         if (unlikely(afinfo == NULL))
895                 return -EAFNOSUPPORT;
896
897         afinfo->decode_session(skb, fl);
898         xfrm_policy_put_afinfo(afinfo);
899         return 0;
900 }
901
902 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
903 {
904         for (; k < sp->len; k++) {
905                 if (sp->x[k].xvec->props.mode)
906                         return 1;
907         }
908
909         return 0;
910 }
911
912 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
913                         unsigned short family)
914 {
915         struct xfrm_policy *pol;
916         struct flowi fl;
917
918         if (_decode_session(skb, &fl, family) < 0)
919                 return 0;
920
921         /* First, check used SA against their selectors. */
922         if (skb->sp) {
923                 int i;
924
925                 for (i=skb->sp->len-1; i>=0; i--) {
926                   struct sec_decap_state *xvec = &(skb->sp->x[i]);
927                         if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
928                                 return 0;
929
930                         /* If there is a post_input processor, try running it */
931                         if (xvec->xvec->type->post_input &&
932                             (xvec->xvec->type->post_input)(xvec->xvec,
933                                                            &(xvec->decap),
934                                                            skb) != 0)
935                                 return 0;
936                 }
937         }
938
939         pol = NULL;
940         if (sk && sk->sk_policy[dir])
941                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
942
943         if (!pol)
944                 pol = flow_cache_lookup(&fl, family,
945                                         policy_to_flow_dir(dir),
946                                         xfrm_policy_lookup);
947
948         if (!pol)
949                 return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
950
951         pol->curlft.use_time = (unsigned long)xtime.tv_sec;
952
953         if (pol->action == XFRM_POLICY_ALLOW) {
954                 struct sec_path *sp;
955                 static struct sec_path dummy;
956                 int i, k;
957
958                 if ((sp = skb->sp) == NULL)
959                         sp = &dummy;
960
961                 /* For each tunnel xfrm, find the first matching tmpl.
962                  * For each tmpl before that, find corresponding xfrm.
963                  * Order is _important_. Later we will implement
964                  * some barriers, but at the moment barriers
965                  * are implied between each two transformations.
966                  */
967                 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
968                         k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
969                         if (k < 0)
970                                 goto reject;
971                 }
972
973                 if (secpath_has_tunnel(sp, k))
974                         goto reject;
975
976                 xfrm_pol_put(pol);
977                 return 1;
978         }
979
980 reject:
981         xfrm_pol_put(pol);
982         return 0;
983 }
984
985 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
986 {
987         struct flowi fl;
988
989         if (_decode_session(skb, &fl, family) < 0)
990                 return 0;
991
992         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
993 }
994
995 /* Optimize later using cookies and generation ids. */
996
997 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
998 {
999         if (!stale_bundle(dst))
1000                 return dst;
1001
1002         dst_release(dst);
1003         return NULL;
1004 }
1005
1006 static int stale_bundle(struct dst_entry *dst)
1007 {
1008         struct dst_entry *child = dst;
1009
1010         while (child) {
1011                 if (child->obsolete > 0 ||
1012                     (child->dev && !netif_running(child->dev)) ||
1013                     (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
1014                         return 1;
1015                 }
1016                 child = child->child;
1017         }
1018
1019         return 0;
1020 }
1021
1022 static void xfrm_dst_destroy(struct dst_entry *dst)
1023 {
1024         if (!dst->xfrm)
1025                 return;
1026         xfrm_state_put(dst->xfrm);
1027         dst->xfrm = NULL;
1028 }
1029
1030 static void xfrm_link_failure(struct sk_buff *skb)
1031 {
1032         /* Impossible. Such dst must be popped before reaches point of failure. */
1033         return;
1034 }
1035
1036 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1037 {
1038         if (dst) {
1039                 if (dst->obsolete) {
1040                         dst_release(dst);
1041                         dst = NULL;
1042                 }
1043         }
1044         return dst;
1045 }
1046
1047 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1048 {
1049         int i;
1050         struct xfrm_policy *pol;
1051         struct dst_entry *dst, **dstp, *gc_list = NULL;
1052
1053         read_lock_bh(&xfrm_policy_lock);
1054         for (i=0; i<2*XFRM_POLICY_MAX; i++) {
1055                 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
1056                         write_lock(&pol->lock);
1057                         dstp = &pol->bundles;
1058                         while ((dst=*dstp) != NULL) {
1059                                 if (func(dst)) {
1060                                         *dstp = dst->next;
1061                                         dst->next = gc_list;
1062                                         gc_list = dst;
1063                                 } else {
1064                                         dstp = &dst->next;
1065                                 }
1066                         }
1067                         write_unlock(&pol->lock);
1068                 }
1069         }
1070         read_unlock_bh(&xfrm_policy_lock);
1071
1072         while (gc_list) {
1073                 dst = gc_list;
1074                 gc_list = dst->next;
1075                 dst_free(dst);
1076         }
1077 }
1078
1079 static int unused_bundle(struct dst_entry *dst)
1080 {
1081         return !atomic_read(&dst->__refcnt);
1082 }
1083
1084 static void __xfrm_garbage_collect(void)
1085 {
1086         xfrm_prune_bundles(unused_bundle);
1087 }
1088
1089 int xfrm_flush_bundles(void)
1090 {
1091         xfrm_prune_bundles(stale_bundle);
1092         return 0;
1093 }
1094
1095 /* Well... that's _TASK_. We need to scan through transformation
1096  * list and figure out what mss tcp should generate in order to
1097  * final datagram fit to mtu. Mama mia... :-)
1098  *
1099  * Apparently, some easy way exists, but we used to choose the most
1100  * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
1101  *
1102  * Consider this function as something like dark humour. :-)
1103  */
1104 static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
1105 {
1106         int res = mtu - dst->header_len;
1107
1108         for (;;) {
1109                 struct dst_entry *d = dst;
1110                 int m = res;
1111
1112                 do {
1113                         struct xfrm_state *x = d->xfrm;
1114                         if (x) {
1115                                 spin_lock_bh(&x->lock);
1116                                 if (x->km.state == XFRM_STATE_VALID &&
1117                                     x->type && x->type->get_max_size)
1118                                         m = x->type->get_max_size(d->xfrm, m);
1119                                 else
1120                                         m += x->props.header_len;
1121                                 spin_unlock_bh(&x->lock);
1122                         }
1123                 } while ((d = d->child) != NULL);
1124
1125                 if (m <= mtu)
1126                         break;
1127                 res -= (m - mtu);
1128                 if (res < 88)
1129                         return mtu;
1130         }
1131
1132         return res + dst->header_len;
1133 }
1134
1135 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1136 {
1137         int err = 0;
1138         if (unlikely(afinfo == NULL))
1139                 return -EINVAL;
1140         if (unlikely(afinfo->family >= NPROTO))
1141                 return -EAFNOSUPPORT;
1142         write_lock(&xfrm_policy_afinfo_lock);
1143         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
1144                 err = -ENOBUFS;
1145         else {
1146                 struct dst_ops *dst_ops = afinfo->dst_ops;
1147                 if (likely(dst_ops->kmem_cachep == NULL))
1148                         dst_ops->kmem_cachep = xfrm_dst_cache;
1149                 if (likely(dst_ops->check == NULL))
1150                         dst_ops->check = xfrm_dst_check;
1151                 if (likely(dst_ops->destroy == NULL))
1152                         dst_ops->destroy = xfrm_dst_destroy;
1153                 if (likely(dst_ops->negative_advice == NULL))
1154                         dst_ops->negative_advice = xfrm_negative_advice;
1155                 if (likely(dst_ops->link_failure == NULL))
1156                         dst_ops->link_failure = xfrm_link_failure;
1157                 if (likely(dst_ops->get_mss == NULL))
1158                         dst_ops->get_mss = xfrm_get_mss;
1159                 if (likely(afinfo->garbage_collect == NULL))
1160                         afinfo->garbage_collect = __xfrm_garbage_collect;
1161                 xfrm_policy_afinfo[afinfo->family] = afinfo;
1162         }
1163         write_unlock(&xfrm_policy_afinfo_lock);
1164         return err;
1165 }
1166
1167 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
1168 {
1169         int err = 0;
1170         if (unlikely(afinfo == NULL))
1171                 return -EINVAL;
1172         if (unlikely(afinfo->family >= NPROTO))
1173                 return -EAFNOSUPPORT;
1174         write_lock(&xfrm_policy_afinfo_lock);
1175         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
1176                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
1177                         err = -EINVAL;
1178                 else {
1179                         struct dst_ops *dst_ops = afinfo->dst_ops;
1180                         xfrm_policy_afinfo[afinfo->family] = NULL;
1181                         dst_ops->kmem_cachep = NULL;
1182                         dst_ops->check = NULL;
1183                         dst_ops->destroy = NULL;
1184                         dst_ops->negative_advice = NULL;
1185                         dst_ops->link_failure = NULL;
1186                         dst_ops->get_mss = NULL;
1187                         afinfo->garbage_collect = NULL;
1188                 }
1189         }
1190         write_unlock(&xfrm_policy_afinfo_lock);
1191         return err;
1192 }
1193
1194 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
1195 {
1196         struct xfrm_policy_afinfo *afinfo;
1197         if (unlikely(family >= NPROTO))
1198                 return NULL;
1199         read_lock(&xfrm_policy_afinfo_lock);
1200         afinfo = xfrm_policy_afinfo[family];
1201         if (likely(afinfo != NULL))
1202                 read_lock(&afinfo->lock);
1203         read_unlock(&xfrm_policy_afinfo_lock);
1204         return afinfo;
1205 }
1206
1207 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
1208 {
1209         if (unlikely(afinfo == NULL))
1210                 return;
1211         read_unlock(&afinfo->lock);
1212 }
1213
1214 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
1215 {
1216         switch (event) {
1217         case NETDEV_DOWN:
1218                 xfrm_flush_bundles();
1219         }
1220         return NOTIFY_DONE;
1221 }
1222
1223 struct notifier_block xfrm_dev_notifier = {
1224         xfrm_dev_event,
1225         NULL,
1226         0
1227 };
1228
1229 void __init xfrm_policy_init(void)
1230 {
1231         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
1232                                            sizeof(struct xfrm_dst),
1233                                            0, SLAB_HWCACHE_ALIGN,
1234                                            NULL, NULL);
1235         if (!xfrm_dst_cache)
1236                 panic("XFRM: failed to allocate xfrm_dst_cache\n");
1237
1238         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
1239         register_netdevice_notifier(&xfrm_dev_notifier);
1240 }
1241
1242 void __init xfrm_init(void)
1243 {
1244         xfrm_state_init();
1245         xfrm_policy_init();
1246         xfrm_input_init();
1247 }
1248