patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / net / xfrm / xfrm_policy.c
1 /* 
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *      
14  */
15
16 #include <linux/config.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <net/xfrm.h>
25 #include <net/ip.h>
26
27 DECLARE_MUTEX(xfrm_cfg_sem);
28
29 static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED;
30
31 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
32
33 static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED;
34 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
35
36 kmem_cache_t *xfrm_dst_cache;
37
38 static struct work_struct xfrm_policy_gc_work;
39 static struct list_head xfrm_policy_gc_list =
40         LIST_HEAD_INIT(xfrm_policy_gc_list);
41 static spinlock_t xfrm_policy_gc_lock = SPIN_LOCK_UNLOCKED;
42
43 int xfrm_register_type(struct xfrm_type *type, unsigned short family)
44 {
45         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
46         struct xfrm_type_map *typemap;
47         int err = 0;
48
49         if (unlikely(afinfo == NULL))
50                 return -EAFNOSUPPORT;
51         typemap = afinfo->type_map;
52
53         write_lock(&typemap->lock);
54         if (likely(typemap->map[type->proto] == NULL))
55                 typemap->map[type->proto] = type;
56         else
57                 err = -EEXIST;
58         write_unlock(&typemap->lock);
59         xfrm_policy_put_afinfo(afinfo);
60         return err;
61 }
62
63 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
64 {
65         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
66         struct xfrm_type_map *typemap;
67         int err = 0;
68
69         if (unlikely(afinfo == NULL))
70                 return -EAFNOSUPPORT;
71         typemap = afinfo->type_map;
72
73         write_lock(&typemap->lock);
74         if (unlikely(typemap->map[type->proto] != type))
75                 err = -ENOENT;
76         else
77                 typemap->map[type->proto] = NULL;
78         write_unlock(&typemap->lock);
79         xfrm_policy_put_afinfo(afinfo);
80         return err;
81 }
82
83 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
84 {
85         struct xfrm_policy_afinfo *afinfo;
86         struct xfrm_type_map *typemap;
87         struct xfrm_type *type;
88         int modload_attempted = 0;
89
90 retry:
91         afinfo = xfrm_policy_get_afinfo(family);
92         if (unlikely(afinfo == NULL))
93                 return NULL;
94         typemap = afinfo->type_map;
95
96         read_lock(&typemap->lock);
97         type = typemap->map[proto];
98         if (unlikely(type && !try_module_get(type->owner)))
99                 type = NULL;
100         read_unlock(&typemap->lock);
101         if (!type && !modload_attempted) {
102                 xfrm_policy_put_afinfo(afinfo);
103                 request_module("xfrm-type-%d-%d",
104                                (int) family, (int) proto);
105                 modload_attempted = 1;
106                 goto retry;
107         }
108
109         xfrm_policy_put_afinfo(afinfo);
110         return type;
111 }
112
113 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
114                     unsigned short family)
115 {
116         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
117         int err = 0;
118
119         if (unlikely(afinfo == NULL))
120                 return -EAFNOSUPPORT;
121
122         if (likely(afinfo->dst_lookup != NULL))
123                 err = afinfo->dst_lookup(dst, fl);
124         else
125                 err = -EINVAL;
126         xfrm_policy_put_afinfo(afinfo);
127         return err;
128 }
129
130 void xfrm_put_type(struct xfrm_type *type)
131 {
132         module_put(type->owner);
133 }
134
135 static inline unsigned long make_jiffies(long secs)
136 {
137         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
138                 return MAX_SCHEDULE_TIMEOUT-1;
139         else
140                 return secs*HZ;
141 }
142
143 static void xfrm_policy_timer(unsigned long data)
144 {
145         struct xfrm_policy *xp = (struct xfrm_policy*)data;
146         unsigned long now = (unsigned long)xtime.tv_sec;
147         long next = LONG_MAX;
148         int warn = 0;
149         int dir;
150
151         read_lock(&xp->lock);
152
153         if (xp->dead)
154                 goto out;
155
156         dir = xp->index & 7;
157
158         if (xp->lft.hard_add_expires_seconds) {
159                 long tmo = xp->lft.hard_add_expires_seconds +
160                         xp->curlft.add_time - now;
161                 if (tmo <= 0)
162                         goto expired;
163                 if (tmo < next)
164                         next = tmo;
165         }
166         if (xp->lft.hard_use_expires_seconds) {
167                 long tmo = xp->lft.hard_use_expires_seconds +
168                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
169                 if (tmo <= 0)
170                         goto expired;
171                 if (tmo < next)
172                         next = tmo;
173         }
174         if (xp->lft.soft_add_expires_seconds) {
175                 long tmo = xp->lft.soft_add_expires_seconds +
176                         xp->curlft.add_time - now;
177                 if (tmo <= 0) {
178                         warn = 1;
179                         tmo = XFRM_KM_TIMEOUT;
180                 }
181                 if (tmo < next)
182                         next = tmo;
183         }
184         if (xp->lft.soft_use_expires_seconds) {
185                 long tmo = xp->lft.soft_use_expires_seconds +
186                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
187                 if (tmo <= 0) {
188                         warn = 1;
189                         tmo = XFRM_KM_TIMEOUT;
190                 }
191                 if (tmo < next)
192                         next = tmo;
193         }
194
195         if (warn)
196                 km_policy_expired(xp, dir, 0);
197         if (next != LONG_MAX &&
198             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
199                 xfrm_pol_hold(xp);
200
201 out:
202         read_unlock(&xp->lock);
203         xfrm_pol_put(xp);
204         return;
205
206 expired:
207         km_policy_expired(xp, dir, 1);
208         xfrm_policy_delete(xp, dir);
209         xfrm_pol_put(xp);
210 }
211
212
213 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
214  * SPD calls.
215  */
216
217 struct xfrm_policy *xfrm_policy_alloc(int gfp)
218 {
219         struct xfrm_policy *policy;
220
221         policy = kmalloc(sizeof(struct xfrm_policy), gfp);
222
223         if (policy) {
224                 memset(policy, 0, sizeof(struct xfrm_policy));
225                 atomic_set(&policy->refcnt, 1);
226                 policy->lock = RW_LOCK_UNLOCKED;
227                 init_timer(&policy->timer);
228                 policy->timer.data = (unsigned long)policy;
229                 policy->timer.function = xfrm_policy_timer;
230         }
231         return policy;
232 }
233
234 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
235
236 void __xfrm_policy_destroy(struct xfrm_policy *policy)
237 {
238         if (!policy->dead)
239                 BUG();
240
241         if (policy->bundles)
242                 BUG();
243
244         if (del_timer(&policy->timer))
245                 BUG();
246
247         kfree(policy);
248 }
249
250 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
251 {
252         struct dst_entry *dst;
253
254         while ((dst = policy->bundles) != NULL) {
255                 policy->bundles = dst->next;
256                 dst_free(dst);
257         }
258
259         if (del_timer(&policy->timer))
260                 atomic_dec(&policy->refcnt);
261
262         if (atomic_read(&policy->refcnt) > 1)
263                 flow_cache_flush();
264
265         xfrm_pol_put(policy);
266 }
267
268 static void xfrm_policy_gc_task(void *data)
269 {
270         struct xfrm_policy *policy;
271         struct list_head *entry, *tmp;
272         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
273
274         spin_lock_bh(&xfrm_policy_gc_lock);
275         list_splice_init(&xfrm_policy_gc_list, &gc_list);
276         spin_unlock_bh(&xfrm_policy_gc_lock);
277
278         list_for_each_safe(entry, tmp, &gc_list) {
279                 policy = list_entry(entry, struct xfrm_policy, list);
280                 xfrm_policy_gc_kill(policy);
281         }
282 }
283
284 /* Rule must be locked. Release descentant resources, announce
285  * entry dead. The rule must be unlinked from lists to the moment.
286  */
287
288 void xfrm_policy_kill(struct xfrm_policy *policy)
289 {
290         write_lock_bh(&policy->lock);
291         if (policy->dead)
292                 goto out;
293
294         policy->dead = 1;
295
296         spin_lock(&xfrm_policy_gc_lock);
297         list_add(&policy->list, &xfrm_policy_gc_list);
298         spin_unlock(&xfrm_policy_gc_lock);
299         schedule_work(&xfrm_policy_gc_work);
300
301 out:
302         write_unlock_bh(&policy->lock);
303 }
304
305 /* Generate new index... KAME seems to generate them ordered by cost
306  * of an absolute inpredictability of ordering of rules. This will not pass. */
307 static u32 xfrm_gen_index(int dir)
308 {
309         u32 idx;
310         struct xfrm_policy *p;
311         static u32 idx_generator;
312
313         for (;;) {
314                 idx = (idx_generator | dir);
315                 idx_generator += 8;
316                 if (idx == 0)
317                         idx = 8;
318                 for (p = xfrm_policy_list[dir]; p; p = p->next) {
319                         if (p->index == idx)
320                                 break;
321                 }
322                 if (!p)
323                         return idx;
324         }
325 }
326
327 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
328 {
329         struct xfrm_policy *pol, **p;
330         struct xfrm_policy *delpol = NULL;
331         struct xfrm_policy **newpos = NULL;
332
333         write_lock_bh(&xfrm_policy_lock);
334         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
335                 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
336                         if (excl) {
337                                 write_unlock_bh(&xfrm_policy_lock);
338                                 return -EEXIST;
339                         }
340                         *p = pol->next;
341                         delpol = pol;
342                         if (policy->priority > pol->priority)
343                                 continue;
344                 } else if (policy->priority >= pol->priority)
345                         continue;
346                 if (!newpos)
347                         newpos = p;
348                 if (delpol)
349                         break;
350         }
351         if (newpos)
352                 p = newpos;
353         xfrm_pol_hold(policy);
354         policy->next = *p;
355         *p = policy;
356         atomic_inc(&flow_cache_genid);
357         policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
358         policy->curlft.add_time = (unsigned long)xtime.tv_sec;
359         policy->curlft.use_time = 0;
360         if (!mod_timer(&policy->timer, jiffies + HZ))
361                 xfrm_pol_hold(policy);
362         write_unlock_bh(&xfrm_policy_lock);
363
364         if (delpol) {
365                 xfrm_policy_kill(delpol);
366         }
367         return 0;
368 }
369
370 struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
371                                       int delete)
372 {
373         struct xfrm_policy *pol, **p;
374
375         write_lock_bh(&xfrm_policy_lock);
376         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
377                 if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
378                         xfrm_pol_hold(pol);
379                         if (delete)
380                                 *p = pol->next;
381                         break;
382                 }
383         }
384         write_unlock_bh(&xfrm_policy_lock);
385
386         if (pol && delete) {
387                 atomic_inc(&flow_cache_genid);
388                 xfrm_policy_kill(pol);
389         }
390         return pol;
391 }
392
393 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
394 {
395         struct xfrm_policy *pol, **p;
396
397         write_lock_bh(&xfrm_policy_lock);
398         for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
399                 if (pol->index == id) {
400                         xfrm_pol_hold(pol);
401                         if (delete)
402                                 *p = pol->next;
403                         break;
404                 }
405         }
406         write_unlock_bh(&xfrm_policy_lock);
407
408         if (pol && delete) {
409                 atomic_inc(&flow_cache_genid);
410                 xfrm_policy_kill(pol);
411         }
412         return pol;
413 }
414
415 void xfrm_policy_flush(void)
416 {
417         struct xfrm_policy *xp;
418         int dir;
419
420         write_lock_bh(&xfrm_policy_lock);
421         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
422                 while ((xp = xfrm_policy_list[dir]) != NULL) {
423                         xfrm_policy_list[dir] = xp->next;
424                         write_unlock_bh(&xfrm_policy_lock);
425
426                         xfrm_policy_kill(xp);
427
428                         write_lock_bh(&xfrm_policy_lock);
429                 }
430         }
431         atomic_inc(&flow_cache_genid);
432         write_unlock_bh(&xfrm_policy_lock);
433 }
434
435 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
436                      void *data)
437 {
438         struct xfrm_policy *xp;
439         int dir;
440         int count = 0;
441         int error = 0;
442
443         read_lock_bh(&xfrm_policy_lock);
444         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
445                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
446                         count++;
447         }
448
449         if (count == 0) {
450                 error = -ENOENT;
451                 goto out;
452         }
453
454         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
455                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
456                         error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
457                         if (error)
458                                 goto out;
459                 }
460         }
461
462 out:
463         read_unlock_bh(&xfrm_policy_lock);
464         return error;
465 }
466
467
468 /* Find policy to apply to this flow. */
469
470 static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
471                                void **objp, atomic_t **obj_refp)
472 {
473         struct xfrm_policy *pol;
474
475         read_lock_bh(&xfrm_policy_lock);
476         for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
477                 struct xfrm_selector *sel = &pol->selector;
478                 int match;
479
480                 if (pol->family != family)
481                         continue;
482
483                 match = xfrm_selector_match(sel, fl, family);
484                 if (match) {
485                         xfrm_pol_hold(pol);
486                         break;
487                 }
488         }
489         read_unlock_bh(&xfrm_policy_lock);
490         if ((*objp = (void *) pol) != NULL)
491                 *obj_refp = &pol->refcnt;
492 }
493
494 struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
495 {
496         struct xfrm_policy *pol;
497
498         read_lock_bh(&xfrm_policy_lock);
499         if ((pol = sk->sk_policy[dir]) != NULL) {
500                 int match = xfrm_selector_match(&pol->selector, fl,
501                                                 sk->sk_family);
502                 if (match)
503                         xfrm_pol_hold(pol);
504                 else
505                         pol = NULL;
506         }
507         read_unlock_bh(&xfrm_policy_lock);
508         return pol;
509 }
510
511 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
512 {
513         pol->next = xfrm_policy_list[dir];
514         xfrm_policy_list[dir] = pol;
515         xfrm_pol_hold(pol);
516 }
517
518 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
519                                                 int dir)
520 {
521         struct xfrm_policy **polp;
522
523         for (polp = &xfrm_policy_list[dir];
524              *polp != NULL; polp = &(*polp)->next) {
525                 if (*polp == pol) {
526                         *polp = pol->next;
527                         return pol;
528                 }
529         }
530         return NULL;
531 }
532
533 void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
534 {
535         write_lock_bh(&xfrm_policy_lock);
536         pol = __xfrm_policy_unlink(pol, dir);
537         write_unlock_bh(&xfrm_policy_lock);
538         if (pol)
539                 xfrm_policy_kill(pol);
540 }
541
542 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
543 {
544         struct xfrm_policy *old_pol;
545
546         write_lock_bh(&xfrm_policy_lock);
547         old_pol = sk->sk_policy[dir];
548         sk->sk_policy[dir] = pol;
549         if (pol) {
550                 pol->curlft.add_time = (unsigned long)xtime.tv_sec;
551                 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
552                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
553         }
554         if (old_pol)
555                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
556         write_unlock_bh(&xfrm_policy_lock);
557
558         if (old_pol) {
559                 xfrm_policy_kill(old_pol);
560         }
561         return 0;
562 }
563
564 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
565 {
566         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
567
568         if (newp) {
569                 newp->selector = old->selector;
570                 newp->lft = old->lft;
571                 newp->curlft = old->curlft;
572                 newp->action = old->action;
573                 newp->flags = old->flags;
574                 newp->xfrm_nr = old->xfrm_nr;
575                 newp->index = old->index;
576                 memcpy(newp->xfrm_vec, old->xfrm_vec,
577                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
578                 write_lock_bh(&xfrm_policy_lock);
579                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
580                 write_unlock_bh(&xfrm_policy_lock);
581                 xfrm_pol_put(newp);
582         }
583         return newp;
584 }
585
586 int __xfrm_sk_clone_policy(struct sock *sk)
587 {
588         struct xfrm_policy *p0 = sk->sk_policy[0],
589                            *p1 = sk->sk_policy[1];
590
591         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
592         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
593                 return -ENOMEM;
594         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
595                 return -ENOMEM;
596         return 0;
597 }
598
599 /* Resolve list of templates for the flow, given policy. */
600
601 static int
602 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
603                   struct xfrm_state **xfrm,
604                   unsigned short family)
605 {
606         int nx;
607         int i, error;
608         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
609         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
610
611         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
612                 struct xfrm_state *x;
613                 xfrm_address_t *remote = daddr;
614                 xfrm_address_t *local  = saddr;
615                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
616
617                 if (tmpl->mode) {
618                         remote = &tmpl->id.daddr;
619                         local = &tmpl->saddr;
620                 }
621
622                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
623
624                 if (x && x->km.state == XFRM_STATE_VALID) {
625                         xfrm[nx++] = x;
626                         daddr = remote;
627                         saddr = local;
628                         continue;
629                 }
630                 if (x) {
631                         error = (x->km.state == XFRM_STATE_ERROR ?
632                                  -EINVAL : -EAGAIN);
633                         xfrm_state_put(x);
634                 }
635
636                 if (!tmpl->optional)
637                         goto fail;
638         }
639         return nx;
640
641 fail:
642         for (nx--; nx>=0; nx--)
643                 xfrm_state_put(xfrm[nx]);
644         return error;
645 }
646
647 /* Check that the bundle accepts the flow and its components are
648  * still valid.
649  */
650
651 static struct dst_entry *
652 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
653 {
654         struct dst_entry *x;
655         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
656         if (unlikely(afinfo == NULL))
657                 return ERR_PTR(-EINVAL);
658         x = afinfo->find_bundle(fl, policy);
659         xfrm_policy_put_afinfo(afinfo);
660         return x;
661 }
662
663 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
664  * all the metrics... Shortly, bundle a bundle.
665  */
666
667 static int
668 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
669                    struct flowi *fl, struct dst_entry **dst_p,
670                    unsigned short family)
671 {
672         int err;
673         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
674         if (unlikely(afinfo == NULL))
675                 return -EINVAL;
676         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
677         xfrm_policy_put_afinfo(afinfo);
678         return err;
679 }
680
681 static inline int policy_to_flow_dir(int dir)
682 {
683         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
684             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
685             XFRM_POLICY_FWD == FLOW_DIR_FWD)
686                 return dir;
687         switch (dir) {
688         default:
689         case XFRM_POLICY_IN:
690                 return FLOW_DIR_IN;
691         case XFRM_POLICY_OUT:
692                 return FLOW_DIR_OUT;
693         case XFRM_POLICY_FWD:
694                 return FLOW_DIR_FWD;
695         };
696 }
697
698 static int stale_bundle(struct dst_entry *dst);
699
700 /* Main function: finds/creates a bundle for given flow.
701  *
702  * At the moment we eat a raw IP route. Mostly to speed up lookups
703  * on interfaces with disabled IPsec.
704  */
705 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
706                 struct sock *sk, int flags)
707 {
708         struct xfrm_policy *policy;
709         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
710         struct rtable *rt = (struct rtable*)*dst_p;
711         struct dst_entry *dst;
712         int nx = 0;
713         int err;
714         u32 genid;
715         u16 family = (*dst_p)->ops->family;
716
717         switch (family) {
718         case AF_INET:
719                 if (!fl->fl4_src)
720                         fl->fl4_src = rt->rt_src;
721                 if (!fl->fl4_dst)
722                         fl->fl4_dst = rt->rt_dst;
723         case AF_INET6:
724                 /* Still not clear... */
725         default:
726                 /* nothing */;
727         }
728
729 restart:
730         genid = atomic_read(&flow_cache_genid);
731         policy = NULL;
732         if (sk && sk->sk_policy[1])
733                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
734
735         if (!policy) {
736                 /* To accelerate a bit...  */
737                 if ((rt->u.dst.flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
738                         return 0;
739
740                 policy = flow_cache_lookup(fl, family,
741                                            policy_to_flow_dir(XFRM_POLICY_OUT),
742                                            xfrm_policy_lookup);
743         }
744
745         if (!policy)
746                 return 0;
747
748         policy->curlft.use_time = (unsigned long)xtime.tv_sec;
749
750         switch (policy->action) {
751         case XFRM_POLICY_BLOCK:
752                 /* Prohibit the flow */
753                 xfrm_pol_put(policy);
754                 return -EPERM;
755
756         case XFRM_POLICY_ALLOW:
757                 if (policy->xfrm_nr == 0) {
758                         /* Flow passes not transformed. */
759                         xfrm_pol_put(policy);
760                         return 0;
761                 }
762
763                 /* Try to find matching bundle.
764                  *
765                  * LATER: help from flow cache. It is optional, this
766                  * is required only for output policy.
767                  */
768                 dst = xfrm_find_bundle(fl, policy, family);
769                 if (IS_ERR(dst)) {
770                         xfrm_pol_put(policy);
771                         return PTR_ERR(dst);
772                 }
773
774                 if (dst)
775                         break;
776
777                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
778
779                 if (unlikely(nx<0)) {
780                         err = nx;
781                         if (err == -EAGAIN && flags) {
782                                 DECLARE_WAITQUEUE(wait, current);
783
784                                 add_wait_queue(&km_waitq, &wait);
785                                 set_current_state(TASK_INTERRUPTIBLE);
786                                 schedule();
787                                 set_current_state(TASK_RUNNING);
788                                 remove_wait_queue(&km_waitq, &wait);
789
790                                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
791
792                                 if (nx == -EAGAIN && signal_pending(current)) {
793                                         err = -ERESTART;
794                                         goto error;
795                                 }
796                                 if (nx == -EAGAIN ||
797                                     genid != atomic_read(&flow_cache_genid)) {
798                                         xfrm_pol_put(policy);
799                                         goto restart;
800                                 }
801                                 err = nx;
802                         }
803                         if (err < 0)
804                                 goto error;
805                 }
806                 if (nx == 0) {
807                         /* Flow passes not transformed. */
808                         xfrm_pol_put(policy);
809                         return 0;
810                 }
811
812                 dst = &rt->u.dst;
813                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
814
815                 if (unlikely(err)) {
816                         int i;
817                         for (i=0; i<nx; i++)
818                                 xfrm_state_put(xfrm[i]);
819                         goto error;
820                 }
821
822                 write_lock_bh(&policy->lock);
823                 if (unlikely(policy->dead || stale_bundle(dst))) {
824                         /* Wow! While we worked on resolving, this
825                          * policy has gone. Retry. It is not paranoia,
826                          * we just cannot enlist new bundle to dead object.
827                          * We can't enlist stable bundles either.
828                          */
829                         write_unlock_bh(&policy->lock);
830
831                         xfrm_pol_put(policy);
832                         if (dst)
833                                 dst_free(dst);
834                         goto restart;
835                 }
836                 dst->next = policy->bundles;
837                 policy->bundles = dst;
838                 dst_hold(dst);
839                 write_unlock_bh(&policy->lock);
840         }
841         *dst_p = dst;
842         ip_rt_put(rt);
843         xfrm_pol_put(policy);
844         return 0;
845
846 error:
847         ip_rt_put(rt);
848         xfrm_pol_put(policy);
849         *dst_p = NULL;
850         return err;
851 }
852
853 /* When skb is transformed back to its "native" form, we have to
854  * check policy restrictions. At the moment we make this in maximally
855  * stupid way. Shame on me. :-) Of course, connected sockets must
856  * have policy cached at them.
857  */
858
859 static inline int
860 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
861               unsigned short family)
862 {
863         if (xfrm_state_kern(x))
864                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
865         return  x->id.proto == tmpl->id.proto &&
866                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
867                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
868                 x->props.mode == tmpl->mode &&
869                 (tmpl->aalgos & (1<<x->props.aalgo)) &&
870                 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
871 }
872
873 static inline int
874 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
875                unsigned short family)
876 {
877         int idx = start;
878
879         if (tmpl->optional) {
880                 if (!tmpl->mode)
881                         return start;
882         } else
883                 start = -1;
884         for (; idx < sp->len; idx++) {
885                 if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
886                         return ++idx;
887                 if (sp->x[idx].xvec->props.mode)
888                         break;
889         }
890         return start;
891 }
892
893 static int
894 _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
895 {
896         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
897
898         if (unlikely(afinfo == NULL))
899                 return -EAFNOSUPPORT;
900
901         afinfo->decode_session(skb, fl);
902         xfrm_policy_put_afinfo(afinfo);
903         return 0;
904 }
905
906 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
907                         unsigned short family)
908 {
909         struct xfrm_policy *pol;
910         struct flowi fl;
911
912         if (_decode_session(skb, &fl, family) < 0)
913                 return 0;
914
915         /* First, check used SA against their selectors. */
916         if (skb->sp) {
917                 int i;
918
919                 for (i=skb->sp->len-1; i>=0; i--) {
920                   struct sec_decap_state *xvec = &(skb->sp->x[i]);
921                         if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
922                                 return 0;
923
924                         /* If there is a post_input processor, try running it */
925                         if (xvec->xvec->type->post_input &&
926                             (xvec->xvec->type->post_input)(xvec->xvec,
927                                                            &(xvec->decap),
928                                                            skb) != 0)
929                                 return 0;
930                 }
931         }
932
933         pol = NULL;
934         if (sk && sk->sk_policy[dir])
935                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
936
937         if (!pol)
938                 pol = flow_cache_lookup(&fl, family,
939                                         policy_to_flow_dir(dir),
940                                         xfrm_policy_lookup);
941
942         if (!pol)
943                 return !skb->sp;
944
945         pol->curlft.use_time = (unsigned long)xtime.tv_sec;
946
947         if (pol->action == XFRM_POLICY_ALLOW) {
948                 struct sec_path *sp;
949                 static struct sec_path dummy;
950                 int i, k;
951
952                 if ((sp = skb->sp) == NULL)
953                         sp = &dummy;
954
955                 /* For each tunnel xfrm, find the first matching tmpl.
956                  * For each tmpl before that, find corresponding xfrm.
957                  * Order is _important_. Later we will implement
958                  * some barriers, but at the moment barriers
959                  * are implied between each two transformations.
960                  */
961                 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
962                         k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
963                         if (k < 0)
964                                 goto reject;
965                 }
966
967                 for (; k < sp->len; k++) {
968                         if (sp->x[k].xvec->props.mode)
969                                 goto reject;
970                 }
971
972                 xfrm_pol_put(pol);
973                 return 1;
974         }
975
976 reject:
977         xfrm_pol_put(pol);
978         return 0;
979 }
980
981 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
982 {
983         struct flowi fl;
984
985         if (_decode_session(skb, &fl, family) < 0)
986                 return 0;
987
988         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
989 }
990
991 /* Optimize later using cookies and generation ids. */
992
993 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
994 {
995         if (!stale_bundle(dst))
996                 return dst;
997
998         dst_release(dst);
999         return NULL;
1000 }
1001
1002 static int stale_bundle(struct dst_entry *dst)
1003 {
1004         struct dst_entry *child = dst;
1005
1006         while (child) {
1007                 if (child->obsolete > 0 ||
1008                     (child->dev && !netif_running(child->dev)) ||
1009                     (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
1010                         return 1;
1011                 }
1012                 child = child->child;
1013         }
1014
1015         return 0;
1016 }
1017
1018 static void xfrm_dst_destroy(struct dst_entry *dst)
1019 {
1020         if (!dst->xfrm)
1021                 return;
1022         xfrm_state_put(dst->xfrm);
1023         dst->xfrm = NULL;
1024 }
1025
1026 static void xfrm_link_failure(struct sk_buff *skb)
1027 {
1028         /* Impossible. Such dst must be popped before reaches point of failure. */
1029         return;
1030 }
1031
1032 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1033 {
1034         if (dst) {
1035                 if (dst->obsolete) {
1036                         dst_release(dst);
1037                         dst = NULL;
1038                 }
1039         }
1040         return dst;
1041 }
1042
1043 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1044 {
1045         int i;
1046         struct xfrm_policy *pol;
1047         struct dst_entry *dst, **dstp, *gc_list = NULL;
1048
1049         read_lock_bh(&xfrm_policy_lock);
1050         for (i=0; i<2*XFRM_POLICY_MAX; i++) {
1051                 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
1052                         write_lock(&pol->lock);
1053                         dstp = &pol->bundles;
1054                         while ((dst=*dstp) != NULL) {
1055                                 if (func(dst)) {
1056                                         *dstp = dst->next;
1057                                         dst->next = gc_list;
1058                                         gc_list = dst;
1059                                 } else {
1060                                         dstp = &dst->next;
1061                                 }
1062                         }
1063                         write_unlock(&pol->lock);
1064                 }
1065         }
1066         read_unlock_bh(&xfrm_policy_lock);
1067
1068         while (gc_list) {
1069                 dst = gc_list;
1070                 gc_list = dst->next;
1071                 dst_free(dst);
1072         }
1073 }
1074
1075 static int unused_bundle(struct dst_entry *dst)
1076 {
1077         return !atomic_read(&dst->__refcnt);
1078 }
1079
1080 static void __xfrm_garbage_collect(void)
1081 {
1082         xfrm_prune_bundles(unused_bundle);
1083 }
1084
1085 int xfrm_flush_bundles(void)
1086 {
1087         xfrm_prune_bundles(stale_bundle);
1088         return 0;
1089 }
1090
1091 /* Well... that's _TASK_. We need to scan through transformation
1092  * list and figure out what mss tcp should generate in order to
1093  * final datagram fit to mtu. Mama mia... :-)
1094  *
1095  * Apparently, some easy way exists, but we used to choose the most
1096  * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
1097  *
1098  * Consider this function as something like dark humour. :-)
1099  */
1100 static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
1101 {
1102         int res = mtu - dst->header_len;
1103
1104         for (;;) {
1105                 struct dst_entry *d = dst;
1106                 int m = res;
1107
1108                 do {
1109                         struct xfrm_state *x = d->xfrm;
1110                         if (x) {
1111                                 spin_lock_bh(&x->lock);
1112                                 if (x->km.state == XFRM_STATE_VALID &&
1113                                     x->type && x->type->get_max_size)
1114                                         m = x->type->get_max_size(d->xfrm, m);
1115                                 else
1116                                         m += x->props.header_len;
1117                                 spin_unlock_bh(&x->lock);
1118                         }
1119                 } while ((d = d->child) != NULL);
1120
1121                 if (m <= mtu)
1122                         break;
1123                 res -= (m - mtu);
1124                 if (res < 88)
1125                         return mtu;
1126         }
1127
1128         return res + dst->header_len;
1129 }
1130
1131 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1132 {
1133         int err = 0;
1134         if (unlikely(afinfo == NULL))
1135                 return -EINVAL;
1136         if (unlikely(afinfo->family >= NPROTO))
1137                 return -EAFNOSUPPORT;
1138         write_lock(&xfrm_policy_afinfo_lock);
1139         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
1140                 err = -ENOBUFS;
1141         else {
1142                 struct dst_ops *dst_ops = afinfo->dst_ops;
1143                 if (likely(dst_ops->kmem_cachep == NULL))
1144                         dst_ops->kmem_cachep = xfrm_dst_cache;
1145                 if (likely(dst_ops->check == NULL))
1146                         dst_ops->check = xfrm_dst_check;
1147                 if (likely(dst_ops->destroy == NULL))
1148                         dst_ops->destroy = xfrm_dst_destroy;
1149                 if (likely(dst_ops->negative_advice == NULL))
1150                         dst_ops->negative_advice = xfrm_negative_advice;
1151                 if (likely(dst_ops->link_failure == NULL))
1152                         dst_ops->link_failure = xfrm_link_failure;
1153                 if (likely(dst_ops->get_mss == NULL))
1154                         dst_ops->get_mss = xfrm_get_mss;
1155                 if (likely(afinfo->garbage_collect == NULL))
1156                         afinfo->garbage_collect = __xfrm_garbage_collect;
1157                 xfrm_policy_afinfo[afinfo->family] = afinfo;
1158         }
1159         write_unlock(&xfrm_policy_afinfo_lock);
1160         return err;
1161 }
1162
1163 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
1164 {
1165         int err = 0;
1166         if (unlikely(afinfo == NULL))
1167                 return -EINVAL;
1168         if (unlikely(afinfo->family >= NPROTO))
1169                 return -EAFNOSUPPORT;
1170         write_lock(&xfrm_policy_afinfo_lock);
1171         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
1172                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
1173                         err = -EINVAL;
1174                 else {
1175                         struct dst_ops *dst_ops = afinfo->dst_ops;
1176                         xfrm_policy_afinfo[afinfo->family] = NULL;
1177                         dst_ops->kmem_cachep = NULL;
1178                         dst_ops->check = NULL;
1179                         dst_ops->destroy = NULL;
1180                         dst_ops->negative_advice = NULL;
1181                         dst_ops->link_failure = NULL;
1182                         dst_ops->get_mss = NULL;
1183                         afinfo->garbage_collect = NULL;
1184                 }
1185         }
1186         write_unlock(&xfrm_policy_afinfo_lock);
1187         return err;
1188 }
1189
1190 struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
1191 {
1192         struct xfrm_policy_afinfo *afinfo;
1193         if (unlikely(family >= NPROTO))
1194                 return NULL;
1195         read_lock(&xfrm_policy_afinfo_lock);
1196         afinfo = xfrm_policy_afinfo[family];
1197         if (likely(afinfo != NULL))
1198                 read_lock(&afinfo->lock);
1199         read_unlock(&xfrm_policy_afinfo_lock);
1200         return afinfo;
1201 }
1202
1203 void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
1204 {
1205         if (unlikely(afinfo == NULL))
1206                 return;
1207         read_unlock(&afinfo->lock);
1208 }
1209
1210 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
1211 {
1212         switch (event) {
1213         case NETDEV_DOWN:
1214                 xfrm_flush_bundles();
1215         }
1216         return NOTIFY_DONE;
1217 }
1218
1219 struct notifier_block xfrm_dev_notifier = {
1220         xfrm_dev_event,
1221         NULL,
1222         0
1223 };
1224
1225 void __init xfrm_policy_init(void)
1226 {
1227         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
1228                                            sizeof(struct xfrm_dst),
1229                                            0, SLAB_HWCACHE_ALIGN,
1230                                            NULL, NULL);
1231         if (!xfrm_dst_cache)
1232                 panic("XFRM: failed to allocate xfrm_dst_cache\n");
1233
1234         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
1235         register_netdevice_notifier(&xfrm_dev_notifier);
1236 }
1237
1238 void __init xfrm_init(void)
1239 {
1240         xfrm_state_init();
1241         xfrm_policy_init();
1242         xfrm_input_init();
1243 }
1244