fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24
25 #include "xfrm_hash.h"
26
27 struct sock *xfrm_nl;
28 EXPORT_SYMBOL(xfrm_nl);
29
30 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32
33 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35
36 /* Each xfrm_state may be linked to two tables:
37
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42
43 static DEFINE_SPINLOCK(xfrm_state_lock);
44
45 /* Hash table to find appropriate SA towards given target (endpoint
46  * of tunnel or destination of transport mode) allowed by selector.
47  *
48  * Main use is finding SA after policy selected tunnel or transport mode.
49  * Also, it can be used by ah/esp icmp error handler to find offending SA.
50  */
51 static struct hlist_head *xfrm_state_bydst __read_mostly;
52 static struct hlist_head *xfrm_state_bysrc __read_mostly;
53 static struct hlist_head *xfrm_state_byspi __read_mostly;
54 static unsigned int xfrm_state_hmask __read_mostly;
55 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56 static unsigned int xfrm_state_num;
57 static unsigned int xfrm_state_genid;
58
59 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60                                          xfrm_address_t *saddr,
61                                          u32 reqid,
62                                          unsigned short family)
63 {
64         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
65 }
66
67 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68                                          xfrm_address_t *saddr,
69                                          unsigned short family)
70 {
71         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
72 }
73
74 static inline unsigned int
75 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
76 {
77         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
78 }
79
80 static void xfrm_hash_transfer(struct hlist_head *list,
81                                struct hlist_head *ndsttable,
82                                struct hlist_head *nsrctable,
83                                struct hlist_head *nspitable,
84                                unsigned int nhashmask)
85 {
86         struct hlist_node *entry, *tmp;
87         struct xfrm_state *x;
88
89         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
90                 unsigned int h;
91
92                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93                                     x->props.reqid, x->props.family,
94                                     nhashmask);
95                 hlist_add_head(&x->bydst, ndsttable+h);
96
97                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
98                                     x->props.family,
99                                     nhashmask);
100                 hlist_add_head(&x->bysrc, nsrctable+h);
101
102                 if (x->id.spi) {
103                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104                                             x->id.proto, x->props.family,
105                                             nhashmask);
106                         hlist_add_head(&x->byspi, nspitable+h);
107                 }
108         }
109 }
110
111 static unsigned long xfrm_hash_new_size(void)
112 {
113         return ((xfrm_state_hmask + 1) << 1) *
114                 sizeof(struct hlist_head);
115 }
116
117 static DEFINE_MUTEX(hash_resize_mutex);
118
119 static void xfrm_hash_resize(struct work_struct *__unused)
120 {
121         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122         unsigned long nsize, osize;
123         unsigned int nhashmask, ohashmask;
124         int i;
125
126         mutex_lock(&hash_resize_mutex);
127
128         nsize = xfrm_hash_new_size();
129         ndst = xfrm_hash_alloc(nsize);
130         if (!ndst)
131                 goto out_unlock;
132         nsrc = xfrm_hash_alloc(nsize);
133         if (!nsrc) {
134                 xfrm_hash_free(ndst, nsize);
135                 goto out_unlock;
136         }
137         nspi = xfrm_hash_alloc(nsize);
138         if (!nspi) {
139                 xfrm_hash_free(ndst, nsize);
140                 xfrm_hash_free(nsrc, nsize);
141                 goto out_unlock;
142         }
143
144         spin_lock_bh(&xfrm_state_lock);
145
146         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147         for (i = xfrm_state_hmask; i >= 0; i--)
148                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
149                                    nhashmask);
150
151         odst = xfrm_state_bydst;
152         osrc = xfrm_state_bysrc;
153         ospi = xfrm_state_byspi;
154         ohashmask = xfrm_state_hmask;
155
156         xfrm_state_bydst = ndst;
157         xfrm_state_bysrc = nsrc;
158         xfrm_state_byspi = nspi;
159         xfrm_state_hmask = nhashmask;
160
161         spin_unlock_bh(&xfrm_state_lock);
162
163         osize = (ohashmask + 1) * sizeof(struct hlist_head);
164         xfrm_hash_free(odst, osize);
165         xfrm_hash_free(osrc, osize);
166         xfrm_hash_free(ospi, osize);
167
168 out_unlock:
169         mutex_unlock(&hash_resize_mutex);
170 }
171
172 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
173
174 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175 EXPORT_SYMBOL(km_waitq);
176
177 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
179
180 static struct work_struct xfrm_state_gc_work;
181 static HLIST_HEAD(xfrm_state_gc_list);
182 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
183
184 int __xfrm_state_delete(struct xfrm_state *x);
185
186 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
187 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
188
189 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
191
192 static void xfrm_state_gc_destroy(struct xfrm_state *x)
193 {
194         del_timer_sync(&x->timer);
195         del_timer_sync(&x->rtimer);
196         kfree(x->aalg);
197         kfree(x->ealg);
198         kfree(x->calg);
199         kfree(x->encap);
200         kfree(x->coaddr);
201         if (x->mode)
202                 xfrm_put_mode(x->mode);
203         if (x->type) {
204                 x->type->destructor(x);
205                 xfrm_put_type(x->type);
206         }
207         security_xfrm_state_free(x);
208         kfree(x);
209 }
210
211 static void xfrm_state_gc_task(struct work_struct *data)
212 {
213         struct xfrm_state *x;
214         struct hlist_node *entry, *tmp;
215         struct hlist_head gc_list;
216
217         spin_lock_bh(&xfrm_state_gc_lock);
218         gc_list.first = xfrm_state_gc_list.first;
219         INIT_HLIST_HEAD(&xfrm_state_gc_list);
220         spin_unlock_bh(&xfrm_state_gc_lock);
221
222         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223                 xfrm_state_gc_destroy(x);
224
225         wake_up(&km_waitq);
226 }
227
228 static inline unsigned long make_jiffies(long secs)
229 {
230         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231                 return MAX_SCHEDULE_TIMEOUT-1;
232         else
233                 return secs*HZ;
234 }
235
236 static void xfrm_timer_handler(unsigned long data)
237 {
238         struct xfrm_state *x = (struct xfrm_state*)data;
239         unsigned long now = (unsigned long)xtime.tv_sec;
240         long next = LONG_MAX;
241         int warn = 0;
242         int err = 0;
243
244         spin_lock(&x->lock);
245         if (x->km.state == XFRM_STATE_DEAD)
246                 goto out;
247         if (x->km.state == XFRM_STATE_EXPIRED)
248                 goto expired;
249         if (x->lft.hard_add_expires_seconds) {
250                 long tmo = x->lft.hard_add_expires_seconds +
251                         x->curlft.add_time - now;
252                 if (tmo <= 0)
253                         goto expired;
254                 if (tmo < next)
255                         next = tmo;
256         }
257         if (x->lft.hard_use_expires_seconds) {
258                 long tmo = x->lft.hard_use_expires_seconds +
259                         (x->curlft.use_time ? : now) - now;
260                 if (tmo <= 0)
261                         goto expired;
262                 if (tmo < next)
263                         next = tmo;
264         }
265         if (x->km.dying)
266                 goto resched;
267         if (x->lft.soft_add_expires_seconds) {
268                 long tmo = x->lft.soft_add_expires_seconds +
269                         x->curlft.add_time - now;
270                 if (tmo <= 0)
271                         warn = 1;
272                 else if (tmo < next)
273                         next = tmo;
274         }
275         if (x->lft.soft_use_expires_seconds) {
276                 long tmo = x->lft.soft_use_expires_seconds +
277                         (x->curlft.use_time ? : now) - now;
278                 if (tmo <= 0)
279                         warn = 1;
280                 else if (tmo < next)
281                         next = tmo;
282         }
283
284         x->km.dying = warn;
285         if (warn)
286                 km_state_expired(x, 0, 0);
287 resched:
288         if (next != LONG_MAX)
289                 mod_timer(&x->timer, jiffies + make_jiffies(next));
290
291         goto out;
292
293 expired:
294         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295                 x->km.state = XFRM_STATE_EXPIRED;
296                 wake_up(&km_waitq);
297                 next = 2;
298                 goto resched;
299         }
300
301         err = __xfrm_state_delete(x);
302         if (!err && x->id.spi)
303                 km_state_expired(x, 1, 0);
304
305         xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306                        AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
307
308 out:
309         spin_unlock(&x->lock);
310 }
311
312 static void xfrm_replay_timer_handler(unsigned long data);
313
314 struct xfrm_state *xfrm_state_alloc(void)
315 {
316         struct xfrm_state *x;
317
318         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
319
320         if (x) {
321                 atomic_set(&x->refcnt, 1);
322                 atomic_set(&x->tunnel_users, 0);
323                 INIT_HLIST_NODE(&x->bydst);
324                 INIT_HLIST_NODE(&x->bysrc);
325                 INIT_HLIST_NODE(&x->byspi);
326                 init_timer(&x->timer);
327                 x->timer.function = xfrm_timer_handler;
328                 x->timer.data     = (unsigned long)x;
329                 init_timer(&x->rtimer);
330                 x->rtimer.function = xfrm_replay_timer_handler;
331                 x->rtimer.data     = (unsigned long)x;
332                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
333                 x->lft.soft_byte_limit = XFRM_INF;
334                 x->lft.soft_packet_limit = XFRM_INF;
335                 x->lft.hard_byte_limit = XFRM_INF;
336                 x->lft.hard_packet_limit = XFRM_INF;
337                 x->replay_maxage = 0;
338                 x->replay_maxdiff = 0;
339                 spin_lock_init(&x->lock);
340         }
341         return x;
342 }
343 EXPORT_SYMBOL(xfrm_state_alloc);
344
345 void __xfrm_state_destroy(struct xfrm_state *x)
346 {
347         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
348
349         spin_lock_bh(&xfrm_state_gc_lock);
350         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351         spin_unlock_bh(&xfrm_state_gc_lock);
352         schedule_work(&xfrm_state_gc_work);
353 }
354 EXPORT_SYMBOL(__xfrm_state_destroy);
355
356 int __xfrm_state_delete(struct xfrm_state *x)
357 {
358         int err = -ESRCH;
359
360         if (x->km.state != XFRM_STATE_DEAD) {
361                 x->km.state = XFRM_STATE_DEAD;
362                 spin_lock(&xfrm_state_lock);
363                 hlist_del(&x->bydst);
364                 hlist_del(&x->bysrc);
365                 if (x->id.spi)
366                         hlist_del(&x->byspi);
367                 xfrm_state_num--;
368                 spin_unlock(&xfrm_state_lock);
369
370                 /* All xfrm_state objects are created by xfrm_state_alloc.
371                  * The xfrm_state_alloc call gives a reference, and that
372                  * is what we are dropping here.
373                  */
374                 __xfrm_state_put(x);
375                 err = 0;
376         }
377
378         return err;
379 }
380 EXPORT_SYMBOL(__xfrm_state_delete);
381
382 int xfrm_state_delete(struct xfrm_state *x)
383 {
384         int err;
385
386         spin_lock_bh(&x->lock);
387         err = __xfrm_state_delete(x);
388         spin_unlock_bh(&x->lock);
389
390         return err;
391 }
392 EXPORT_SYMBOL(xfrm_state_delete);
393
394 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
395 {
396         int i;
397         int err = 0;
398
399         spin_lock_bh(&xfrm_state_lock);
400         for (i = 0; i <= xfrm_state_hmask; i++) {
401                 struct hlist_node *entry;
402                 struct xfrm_state *x;
403 restart:
404                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405                         if (!xfrm_state_kern(x) &&
406                             xfrm_id_proto_match(x->id.proto, proto)) {
407                                 xfrm_state_hold(x);
408                                 spin_unlock_bh(&xfrm_state_lock);
409
410                                 err = xfrm_state_delete(x);
411                                 xfrm_audit_log(audit_info->loginuid,
412                                                audit_info->secid,
413                                                AUDIT_MAC_IPSEC_DELSA,
414                                                err ? 0 : 1, NULL, x);
415                                 xfrm_state_put(x);
416
417                                 spin_lock_bh(&xfrm_state_lock);
418                                 goto restart;
419                         }
420                 }
421         }
422         spin_unlock_bh(&xfrm_state_lock);
423         wake_up(&km_waitq);
424 }
425 EXPORT_SYMBOL(xfrm_state_flush);
426
427 static int
428 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
429                   struct xfrm_tmpl *tmpl,
430                   xfrm_address_t *daddr, xfrm_address_t *saddr,
431                   unsigned short family)
432 {
433         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
434         if (!afinfo)
435                 return -1;
436         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
437         xfrm_state_put_afinfo(afinfo);
438         return 0;
439 }
440
441 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
442 {
443         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
444         struct xfrm_state *x;
445         struct hlist_node *entry;
446
447         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
448                 if (x->props.family != family ||
449                     x->id.spi       != spi ||
450                     x->id.proto     != proto)
451                         continue;
452
453                 switch (family) {
454                 case AF_INET:
455                         if (x->id.daddr.a4 != daddr->a4)
456                                 continue;
457                         break;
458                 case AF_INET6:
459                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
460                                              (struct in6_addr *)
461                                              x->id.daddr.a6))
462                                 continue;
463                         break;
464                 };
465
466                 xfrm_state_hold(x);
467                 return x;
468         }
469
470         return NULL;
471 }
472
473 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
474 {
475         unsigned int h = xfrm_src_hash(daddr, saddr, family);
476         struct xfrm_state *x;
477         struct hlist_node *entry;
478
479         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
480                 if (x->props.family != family ||
481                     x->id.proto     != proto)
482                         continue;
483
484                 switch (family) {
485                 case AF_INET:
486                         if (x->id.daddr.a4 != daddr->a4 ||
487                             x->props.saddr.a4 != saddr->a4)
488                                 continue;
489                         break;
490                 case AF_INET6:
491                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
492                                              (struct in6_addr *)
493                                              x->id.daddr.a6) ||
494                             !ipv6_addr_equal((struct in6_addr *)saddr,
495                                              (struct in6_addr *)
496                                              x->props.saddr.a6))
497                                 continue;
498                         break;
499                 };
500
501                 xfrm_state_hold(x);
502                 return x;
503         }
504
505         return NULL;
506 }
507
508 static inline struct xfrm_state *
509 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
510 {
511         if (use_spi)
512                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
513                                            x->id.proto, family);
514         else
515                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
516                                                   &x->props.saddr,
517                                                   x->id.proto, family);
518 }
519
520 static void xfrm_hash_grow_check(int have_hash_collision)
521 {
522         if (have_hash_collision &&
523             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
524             xfrm_state_num > xfrm_state_hmask)
525                 schedule_work(&xfrm_hash_work);
526 }
527
528 struct xfrm_state *
529 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
530                 struct flowi *fl, struct xfrm_tmpl *tmpl,
531                 struct xfrm_policy *pol, int *err,
532                 unsigned short family)
533 {
534         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
535         struct hlist_node *entry;
536         struct xfrm_state *x, *x0;
537         int acquire_in_progress = 0;
538         int error = 0;
539         struct xfrm_state *best = NULL;
540         
541         spin_lock_bh(&xfrm_state_lock);
542         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
543                 if (x->props.family == family &&
544                     x->props.reqid == tmpl->reqid &&
545                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
546                     xfrm_state_addr_check(x, daddr, saddr, family) &&
547                     tmpl->mode == x->props.mode &&
548                     tmpl->id.proto == x->id.proto &&
549                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
550                         /* Resolution logic:
551                            1. There is a valid state with matching selector.
552                               Done.
553                            2. Valid state with inappropriate selector. Skip.
554
555                            Entering area of "sysdeps".
556
557                            3. If state is not valid, selector is temporary,
558                               it selects only session which triggered
559                               previous resolution. Key manager will do
560                               something to install a state with proper
561                               selector.
562                          */
563                         if (x->km.state == XFRM_STATE_VALID) {
564                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
565                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
566                                         continue;
567                                 if (!best ||
568                                     best->km.dying > x->km.dying ||
569                                     (best->km.dying == x->km.dying &&
570                                      best->curlft.add_time < x->curlft.add_time))
571                                         best = x;
572                         } else if (x->km.state == XFRM_STATE_ACQ) {
573                                 acquire_in_progress = 1;
574                         } else if (x->km.state == XFRM_STATE_ERROR ||
575                                    x->km.state == XFRM_STATE_EXPIRED) {
576                                 if (xfrm_selector_match(&x->sel, fl, family) &&
577                                     security_xfrm_state_pol_flow_match(x, pol, fl))
578                                         error = -ESRCH;
579                         }
580                 }
581         }
582
583         x = best;
584         if (!x && !error && !acquire_in_progress) {
585                 if (tmpl->id.spi &&
586                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
587                                               tmpl->id.proto, family)) != NULL) {
588                         xfrm_state_put(x0);
589                         error = -EEXIST;
590                         goto out;
591                 }
592                 x = xfrm_state_alloc();
593                 if (x == NULL) {
594                         error = -ENOMEM;
595                         goto out;
596                 }
597                 /* Initialize temporary selector matching only
598                  * to current session. */
599                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
600
601                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
602                 if (error) {
603                         x->km.state = XFRM_STATE_DEAD;
604                         xfrm_state_put(x);
605                         x = NULL;
606                         goto out;
607                 }
608
609                 if (km_query(x, tmpl, pol) == 0) {
610                         x->km.state = XFRM_STATE_ACQ;
611                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
612                         h = xfrm_src_hash(daddr, saddr, family);
613                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
614                         if (x->id.spi) {
615                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
616                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
617                         }
618                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
619                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
620                         add_timer(&x->timer);
621                         xfrm_state_num++;
622                         xfrm_hash_grow_check(x->bydst.next != NULL);
623                 } else {
624                         x->km.state = XFRM_STATE_DEAD;
625                         xfrm_state_put(x);
626                         x = NULL;
627                         error = -ESRCH;
628                 }
629         }
630 out:
631         if (x)
632                 xfrm_state_hold(x);
633         else
634                 *err = acquire_in_progress ? -EAGAIN : error;
635         spin_unlock_bh(&xfrm_state_lock);
636         return x;
637 }
638
639 static void __xfrm_state_insert(struct xfrm_state *x)
640 {
641         unsigned int h;
642
643         x->genid = ++xfrm_state_genid;
644
645         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
646                           x->props.reqid, x->props.family);
647         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
648
649         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
650         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
651
652         if (x->id.spi) {
653                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
654                                   x->props.family);
655
656                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
657         }
658
659         mod_timer(&x->timer, jiffies + HZ);
660         if (x->replay_maxage)
661                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
662
663         wake_up(&km_waitq);
664
665         xfrm_state_num++;
666
667         xfrm_hash_grow_check(x->bydst.next != NULL);
668 }
669
670 /* xfrm_state_lock is held */
671 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
672 {
673         unsigned short family = xnew->props.family;
674         u32 reqid = xnew->props.reqid;
675         struct xfrm_state *x;
676         struct hlist_node *entry;
677         unsigned int h;
678
679         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
680         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
681                 if (x->props.family     == family &&
682                     x->props.reqid      == reqid &&
683                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
684                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
685                         x->genid = xfrm_state_genid;
686         }
687 }
688
689 void xfrm_state_insert(struct xfrm_state *x)
690 {
691         spin_lock_bh(&xfrm_state_lock);
692         __xfrm_state_bump_genids(x);
693         __xfrm_state_insert(x);
694         spin_unlock_bh(&xfrm_state_lock);
695 }
696 EXPORT_SYMBOL(xfrm_state_insert);
697
698 /* xfrm_state_lock is held */
699 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
700 {
701         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
702         struct hlist_node *entry;
703         struct xfrm_state *x;
704
705         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
706                 if (x->props.reqid  != reqid ||
707                     x->props.mode   != mode ||
708                     x->props.family != family ||
709                     x->km.state     != XFRM_STATE_ACQ ||
710                     x->id.spi       != 0 ||
711                     x->id.proto     != proto)
712                         continue;
713
714                 switch (family) {
715                 case AF_INET:
716                         if (x->id.daddr.a4    != daddr->a4 ||
717                             x->props.saddr.a4 != saddr->a4)
718                                 continue;
719                         break;
720                 case AF_INET6:
721                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
722                                              (struct in6_addr *)daddr) ||
723                             !ipv6_addr_equal((struct in6_addr *)
724                                              x->props.saddr.a6,
725                                              (struct in6_addr *)saddr))
726                                 continue;
727                         break;
728                 };
729
730                 xfrm_state_hold(x);
731                 return x;
732         }
733
734         if (!create)
735                 return NULL;
736
737         x = xfrm_state_alloc();
738         if (likely(x)) {
739                 switch (family) {
740                 case AF_INET:
741                         x->sel.daddr.a4 = daddr->a4;
742                         x->sel.saddr.a4 = saddr->a4;
743                         x->sel.prefixlen_d = 32;
744                         x->sel.prefixlen_s = 32;
745                         x->props.saddr.a4 = saddr->a4;
746                         x->id.daddr.a4 = daddr->a4;
747                         break;
748
749                 case AF_INET6:
750                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
751                                        (struct in6_addr *)daddr);
752                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
753                                        (struct in6_addr *)saddr);
754                         x->sel.prefixlen_d = 128;
755                         x->sel.prefixlen_s = 128;
756                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
757                                        (struct in6_addr *)saddr);
758                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
759                                        (struct in6_addr *)daddr);
760                         break;
761                 };
762
763                 x->km.state = XFRM_STATE_ACQ;
764                 x->id.proto = proto;
765                 x->props.family = family;
766                 x->props.mode = mode;
767                 x->props.reqid = reqid;
768                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
769                 xfrm_state_hold(x);
770                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
771                 add_timer(&x->timer);
772                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
773                 h = xfrm_src_hash(daddr, saddr, family);
774                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
775                 wake_up(&km_waitq);
776
777                 xfrm_state_num++;
778
779                 xfrm_hash_grow_check(x->bydst.next != NULL);
780         }
781
782         return x;
783 }
784
785 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
786
787 int xfrm_state_add(struct xfrm_state *x)
788 {
789         struct xfrm_state *x1;
790         int family;
791         int err;
792         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
793
794         family = x->props.family;
795
796         spin_lock_bh(&xfrm_state_lock);
797
798         x1 = __xfrm_state_locate(x, use_spi, family);
799         if (x1) {
800                 xfrm_state_put(x1);
801                 x1 = NULL;
802                 err = -EEXIST;
803                 goto out;
804         }
805
806         if (use_spi && x->km.seq) {
807                 x1 = __xfrm_find_acq_byseq(x->km.seq);
808                 if (x1 && ((x1->id.proto != x->id.proto) ||
809                     xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
810                         xfrm_state_put(x1);
811                         x1 = NULL;
812                 }
813         }
814
815         if (use_spi && !x1)
816                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
817                                      x->id.proto,
818                                      &x->id.daddr, &x->props.saddr, 0);
819
820         __xfrm_state_bump_genids(x);
821         __xfrm_state_insert(x);
822         err = 0;
823
824 out:
825         spin_unlock_bh(&xfrm_state_lock);
826
827         if (x1) {
828                 xfrm_state_delete(x1);
829                 xfrm_state_put(x1);
830         }
831
832         return err;
833 }
834 EXPORT_SYMBOL(xfrm_state_add);
835
836 int xfrm_state_update(struct xfrm_state *x)
837 {
838         struct xfrm_state *x1;
839         int err;
840         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
841
842         spin_lock_bh(&xfrm_state_lock);
843         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
844
845         err = -ESRCH;
846         if (!x1)
847                 goto out;
848
849         if (xfrm_state_kern(x1)) {
850                 xfrm_state_put(x1);
851                 err = -EEXIST;
852                 goto out;
853         }
854
855         if (x1->km.state == XFRM_STATE_ACQ) {
856                 __xfrm_state_insert(x);
857                 x = NULL;
858         }
859         err = 0;
860
861 out:
862         spin_unlock_bh(&xfrm_state_lock);
863
864         if (err)
865                 return err;
866
867         if (!x) {
868                 xfrm_state_delete(x1);
869                 xfrm_state_put(x1);
870                 return 0;
871         }
872
873         err = -EINVAL;
874         spin_lock_bh(&x1->lock);
875         if (likely(x1->km.state == XFRM_STATE_VALID)) {
876                 if (x->encap && x1->encap)
877                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
878                 if (x->coaddr && x1->coaddr) {
879                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
880                 }
881                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
882                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
883                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
884                 x1->km.dying = 0;
885
886                 mod_timer(&x1->timer, jiffies + HZ);
887                 if (x1->curlft.use_time)
888                         xfrm_state_check_expire(x1);
889
890                 err = 0;
891         }
892         spin_unlock_bh(&x1->lock);
893
894         xfrm_state_put(x1);
895
896         return err;
897 }
898 EXPORT_SYMBOL(xfrm_state_update);
899
900 int xfrm_state_check_expire(struct xfrm_state *x)
901 {
902         if (!x->curlft.use_time)
903                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
904
905         if (x->km.state != XFRM_STATE_VALID)
906                 return -EINVAL;
907
908         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
909             x->curlft.packets >= x->lft.hard_packet_limit) {
910                 x->km.state = XFRM_STATE_EXPIRED;
911                 mod_timer(&x->timer, jiffies);
912                 return -EINVAL;
913         }
914
915         if (!x->km.dying &&
916             (x->curlft.bytes >= x->lft.soft_byte_limit ||
917              x->curlft.packets >= x->lft.soft_packet_limit)) {
918                 x->km.dying = 1;
919                 km_state_expired(x, 0, 0);
920         }
921         return 0;
922 }
923 EXPORT_SYMBOL(xfrm_state_check_expire);
924
925 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
926 {
927         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
928                 - skb_headroom(skb);
929
930         if (nhead > 0)
931                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
932
933         /* Check tail too... */
934         return 0;
935 }
936
937 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
938 {
939         int err = xfrm_state_check_expire(x);
940         if (err < 0)
941                 goto err;
942         err = xfrm_state_check_space(x, skb);
943 err:
944         return err;
945 }
946 EXPORT_SYMBOL(xfrm_state_check);
947
948 struct xfrm_state *
949 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
950                   unsigned short family)
951 {
952         struct xfrm_state *x;
953
954         spin_lock_bh(&xfrm_state_lock);
955         x = __xfrm_state_lookup(daddr, spi, proto, family);
956         spin_unlock_bh(&xfrm_state_lock);
957         return x;
958 }
959 EXPORT_SYMBOL(xfrm_state_lookup);
960
961 struct xfrm_state *
962 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
963                          u8 proto, unsigned short family)
964 {
965         struct xfrm_state *x;
966
967         spin_lock_bh(&xfrm_state_lock);
968         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
969         spin_unlock_bh(&xfrm_state_lock);
970         return x;
971 }
972 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
973
974 struct xfrm_state *
975 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
976               xfrm_address_t *daddr, xfrm_address_t *saddr, 
977               int create, unsigned short family)
978 {
979         struct xfrm_state *x;
980
981         spin_lock_bh(&xfrm_state_lock);
982         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
983         spin_unlock_bh(&xfrm_state_lock);
984
985         return x;
986 }
987 EXPORT_SYMBOL(xfrm_find_acq);
988
989 #ifdef CONFIG_XFRM_SUB_POLICY
990 int
991 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
992                unsigned short family)
993 {
994         int err = 0;
995         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
996         if (!afinfo)
997                 return -EAFNOSUPPORT;
998
999         spin_lock_bh(&xfrm_state_lock);
1000         if (afinfo->tmpl_sort)
1001                 err = afinfo->tmpl_sort(dst, src, n);
1002         spin_unlock_bh(&xfrm_state_lock);
1003         xfrm_state_put_afinfo(afinfo);
1004         return err;
1005 }
1006 EXPORT_SYMBOL(xfrm_tmpl_sort);
1007
1008 int
1009 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1010                 unsigned short family)
1011 {
1012         int err = 0;
1013         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1014         if (!afinfo)
1015                 return -EAFNOSUPPORT;
1016
1017         spin_lock_bh(&xfrm_state_lock);
1018         if (afinfo->state_sort)
1019                 err = afinfo->state_sort(dst, src, n);
1020         spin_unlock_bh(&xfrm_state_lock);
1021         xfrm_state_put_afinfo(afinfo);
1022         return err;
1023 }
1024 EXPORT_SYMBOL(xfrm_state_sort);
1025 #endif
1026
1027 /* Silly enough, but I'm lazy to build resolution list */
1028
1029 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1030 {
1031         int i;
1032
1033         for (i = 0; i <= xfrm_state_hmask; i++) {
1034                 struct hlist_node *entry;
1035                 struct xfrm_state *x;
1036
1037                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1038                         if (x->km.seq == seq &&
1039                             x->km.state == XFRM_STATE_ACQ) {
1040                                 xfrm_state_hold(x);
1041                                 return x;
1042                         }
1043                 }
1044         }
1045         return NULL;
1046 }
1047
1048 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1049 {
1050         struct xfrm_state *x;
1051
1052         spin_lock_bh(&xfrm_state_lock);
1053         x = __xfrm_find_acq_byseq(seq);
1054         spin_unlock_bh(&xfrm_state_lock);
1055         return x;
1056 }
1057 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1058
1059 u32 xfrm_get_acqseq(void)
1060 {
1061         u32 res;
1062         static u32 acqseq;
1063         static DEFINE_SPINLOCK(acqseq_lock);
1064
1065         spin_lock_bh(&acqseq_lock);
1066         res = (++acqseq ? : ++acqseq);
1067         spin_unlock_bh(&acqseq_lock);
1068         return res;
1069 }
1070 EXPORT_SYMBOL(xfrm_get_acqseq);
1071
1072 void
1073 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1074 {
1075         unsigned int h;
1076         struct xfrm_state *x0;
1077
1078         if (x->id.spi)
1079                 return;
1080
1081         if (minspi == maxspi) {
1082                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1083                 if (x0) {
1084                         xfrm_state_put(x0);
1085                         return;
1086                 }
1087                 x->id.spi = minspi;
1088         } else {
1089                 u32 spi = 0;
1090                 u32 low = ntohl(minspi);
1091                 u32 high = ntohl(maxspi);
1092                 for (h=0; h<high-low+1; h++) {
1093                         spi = low + net_random()%(high-low+1);
1094                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1095                         if (x0 == NULL) {
1096                                 x->id.spi = htonl(spi);
1097                                 break;
1098                         }
1099                         xfrm_state_put(x0);
1100                 }
1101         }
1102         if (x->id.spi) {
1103                 spin_lock_bh(&xfrm_state_lock);
1104                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1105                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1106                 spin_unlock_bh(&xfrm_state_lock);
1107                 wake_up(&km_waitq);
1108         }
1109 }
1110 EXPORT_SYMBOL(xfrm_alloc_spi);
1111
1112 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1113                     void *data)
1114 {
1115         int i;
1116         struct xfrm_state *x, *last = NULL;
1117         struct hlist_node *entry;
1118         int count = 0;
1119         int err = 0;
1120
1121         spin_lock_bh(&xfrm_state_lock);
1122         for (i = 0; i <= xfrm_state_hmask; i++) {
1123                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1124                         if (!xfrm_id_proto_match(x->id.proto, proto))
1125                                 continue;
1126                         if (last) {
1127                                 err = func(last, count, data);
1128                                 if (err)
1129                                         goto out;
1130                         }
1131                         last = x;
1132                         count++;
1133                 }
1134         }
1135         if (count == 0) {
1136                 err = -ENOENT;
1137                 goto out;
1138         }
1139         err = func(last, 0, data);
1140 out:
1141         spin_unlock_bh(&xfrm_state_lock);
1142         return err;
1143 }
1144 EXPORT_SYMBOL(xfrm_state_walk);
1145
1146
1147 void xfrm_replay_notify(struct xfrm_state *x, int event)
1148 {
1149         struct km_event c;
1150         /* we send notify messages in case
1151          *  1. we updated on of the sequence numbers, and the seqno difference
1152          *     is at least x->replay_maxdiff, in this case we also update the
1153          *     timeout of our timer function
1154          *  2. if x->replay_maxage has elapsed since last update,
1155          *     and there were changes
1156          *
1157          *  The state structure must be locked!
1158          */
1159
1160         switch (event) {
1161         case XFRM_REPLAY_UPDATE:
1162                 if (x->replay_maxdiff &&
1163                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1164                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1165                         if (x->xflags & XFRM_TIME_DEFER)
1166                                 event = XFRM_REPLAY_TIMEOUT;
1167                         else
1168                                 return;
1169                 }
1170
1171                 break;
1172
1173         case XFRM_REPLAY_TIMEOUT:
1174                 if ((x->replay.seq == x->preplay.seq) &&
1175                     (x->replay.bitmap == x->preplay.bitmap) &&
1176                     (x->replay.oseq == x->preplay.oseq)) {
1177                         x->xflags |= XFRM_TIME_DEFER;
1178                         return;
1179                 }
1180
1181                 break;
1182         }
1183
1184         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1185         c.event = XFRM_MSG_NEWAE;
1186         c.data.aevent = event;
1187         km_state_notify(x, &c);
1188
1189         if (x->replay_maxage &&
1190             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1191                 x->xflags &= ~XFRM_TIME_DEFER;
1192 }
1193 EXPORT_SYMBOL(xfrm_replay_notify);
1194
1195 static void xfrm_replay_timer_handler(unsigned long data)
1196 {
1197         struct xfrm_state *x = (struct xfrm_state*)data;
1198
1199         spin_lock(&x->lock);
1200
1201         if (x->km.state == XFRM_STATE_VALID) {
1202                 if (xfrm_aevent_is_on())
1203                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1204                 else
1205                         x->xflags |= XFRM_TIME_DEFER;
1206         }
1207
1208         spin_unlock(&x->lock);
1209 }
1210
1211 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1212 {
1213         u32 diff;
1214         u32 seq = ntohl(net_seq);
1215
1216         if (unlikely(seq == 0))
1217                 return -EINVAL;
1218
1219         if (likely(seq > x->replay.seq))
1220                 return 0;
1221
1222         diff = x->replay.seq - seq;
1223         if (diff >= min_t(unsigned int, x->props.replay_window,
1224                           sizeof(x->replay.bitmap) * 8)) {
1225                 x->stats.replay_window++;
1226                 return -EINVAL;
1227         }
1228
1229         if (x->replay.bitmap & (1U << diff)) {
1230                 x->stats.replay++;
1231                 return -EINVAL;
1232         }
1233         return 0;
1234 }
1235 EXPORT_SYMBOL(xfrm_replay_check);
1236
1237 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1238 {
1239         u32 diff;
1240         u32 seq = ntohl(net_seq);
1241
1242         if (seq > x->replay.seq) {
1243                 diff = seq - x->replay.seq;
1244                 if (diff < x->props.replay_window)
1245                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1246                 else
1247                         x->replay.bitmap = 1;
1248                 x->replay.seq = seq;
1249         } else {
1250                 diff = x->replay.seq - seq;
1251                 x->replay.bitmap |= (1U << diff);
1252         }
1253
1254         if (xfrm_aevent_is_on())
1255                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1256 }
1257 EXPORT_SYMBOL(xfrm_replay_advance);
1258
1259 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1260 static DEFINE_RWLOCK(xfrm_km_lock);
1261
1262 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1263 {
1264         struct xfrm_mgr *km;
1265
1266         read_lock(&xfrm_km_lock);
1267         list_for_each_entry(km, &xfrm_km_list, list)
1268                 if (km->notify_policy)
1269                         km->notify_policy(xp, dir, c);
1270         read_unlock(&xfrm_km_lock);
1271 }
1272
1273 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1274 {
1275         struct xfrm_mgr *km;
1276         read_lock(&xfrm_km_lock);
1277         list_for_each_entry(km, &xfrm_km_list, list)
1278                 if (km->notify)
1279                         km->notify(x, c);
1280         read_unlock(&xfrm_km_lock);
1281 }
1282
1283 EXPORT_SYMBOL(km_policy_notify);
1284 EXPORT_SYMBOL(km_state_notify);
1285
1286 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1287 {
1288         struct km_event c;
1289
1290         c.data.hard = hard;
1291         c.pid = pid;
1292         c.event = XFRM_MSG_EXPIRE;
1293         km_state_notify(x, &c);
1294
1295         if (hard)
1296                 wake_up(&km_waitq);
1297 }
1298
1299 EXPORT_SYMBOL(km_state_expired);
1300 /*
1301  * We send to all registered managers regardless of failure
1302  * We are happy with one success
1303 */
1304 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1305 {
1306         int err = -EINVAL, acqret;
1307         struct xfrm_mgr *km;
1308
1309         read_lock(&xfrm_km_lock);
1310         list_for_each_entry(km, &xfrm_km_list, list) {
1311                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1312                 if (!acqret)
1313                         err = acqret;
1314         }
1315         read_unlock(&xfrm_km_lock);
1316         return err;
1317 }
1318 EXPORT_SYMBOL(km_query);
1319
1320 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1321 {
1322         int err = -EINVAL;
1323         struct xfrm_mgr *km;
1324
1325         read_lock(&xfrm_km_lock);
1326         list_for_each_entry(km, &xfrm_km_list, list) {
1327                 if (km->new_mapping)
1328                         err = km->new_mapping(x, ipaddr, sport);
1329                 if (!err)
1330                         break;
1331         }
1332         read_unlock(&xfrm_km_lock);
1333         return err;
1334 }
1335 EXPORT_SYMBOL(km_new_mapping);
1336
1337 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1338 {
1339         struct km_event c;
1340
1341         c.data.hard = hard;
1342         c.pid = pid;
1343         c.event = XFRM_MSG_POLEXPIRE;
1344         km_policy_notify(pol, dir, &c);
1345
1346         if (hard)
1347                 wake_up(&km_waitq);
1348 }
1349 EXPORT_SYMBOL(km_policy_expired);
1350
1351 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1352 {
1353         int err = -EINVAL;
1354         int ret;
1355         struct xfrm_mgr *km;
1356
1357         read_lock(&xfrm_km_lock);
1358         list_for_each_entry(km, &xfrm_km_list, list) {
1359                 if (km->report) {
1360                         ret = km->report(proto, sel, addr);
1361                         if (!ret)
1362                                 err = ret;
1363                 }
1364         }
1365         read_unlock(&xfrm_km_lock);
1366         return err;
1367 }
1368 EXPORT_SYMBOL(km_report);
1369
1370 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1371 {
1372         int err;
1373         u8 *data;
1374         struct xfrm_mgr *km;
1375         struct xfrm_policy *pol = NULL;
1376
1377         if (optlen <= 0 || optlen > PAGE_SIZE)
1378                 return -EMSGSIZE;
1379
1380         data = kmalloc(optlen, GFP_KERNEL);
1381         if (!data)
1382                 return -ENOMEM;
1383
1384         err = -EFAULT;
1385         if (copy_from_user(data, optval, optlen))
1386                 goto out;
1387
1388         err = -EINVAL;
1389         read_lock(&xfrm_km_lock);
1390         list_for_each_entry(km, &xfrm_km_list, list) {
1391                 pol = km->compile_policy(sk, optname, data,
1392                                          optlen, &err);
1393                 if (err >= 0)
1394                         break;
1395         }
1396         read_unlock(&xfrm_km_lock);
1397
1398         if (err >= 0) {
1399                 xfrm_sk_policy_insert(sk, err, pol);
1400                 xfrm_pol_put(pol);
1401                 err = 0;
1402         }
1403
1404 out:
1405         kfree(data);
1406         return err;
1407 }
1408 EXPORT_SYMBOL(xfrm_user_policy);
1409
1410 int xfrm_register_km(struct xfrm_mgr *km)
1411 {
1412         write_lock_bh(&xfrm_km_lock);
1413         list_add_tail(&km->list, &xfrm_km_list);
1414         write_unlock_bh(&xfrm_km_lock);
1415         return 0;
1416 }
1417 EXPORT_SYMBOL(xfrm_register_km);
1418
1419 int xfrm_unregister_km(struct xfrm_mgr *km)
1420 {
1421         write_lock_bh(&xfrm_km_lock);
1422         list_del(&km->list);
1423         write_unlock_bh(&xfrm_km_lock);
1424         return 0;
1425 }
1426 EXPORT_SYMBOL(xfrm_unregister_km);
1427
1428 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1429 {
1430         int err = 0;
1431         if (unlikely(afinfo == NULL))
1432                 return -EINVAL;
1433         if (unlikely(afinfo->family >= NPROTO))
1434                 return -EAFNOSUPPORT;
1435         write_lock_bh(&xfrm_state_afinfo_lock);
1436         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1437                 err = -ENOBUFS;
1438         else
1439                 xfrm_state_afinfo[afinfo->family] = afinfo;
1440         write_unlock_bh(&xfrm_state_afinfo_lock);
1441         return err;
1442 }
1443 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1444
1445 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1446 {
1447         int err = 0;
1448         if (unlikely(afinfo == NULL))
1449                 return -EINVAL;
1450         if (unlikely(afinfo->family >= NPROTO))
1451                 return -EAFNOSUPPORT;
1452         write_lock_bh(&xfrm_state_afinfo_lock);
1453         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1454                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1455                         err = -EINVAL;
1456                 else
1457                         xfrm_state_afinfo[afinfo->family] = NULL;
1458         }
1459         write_unlock_bh(&xfrm_state_afinfo_lock);
1460         return err;
1461 }
1462 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1463
1464 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1465 {
1466         struct xfrm_state_afinfo *afinfo;
1467         if (unlikely(family >= NPROTO))
1468                 return NULL;
1469         read_lock(&xfrm_state_afinfo_lock);
1470         afinfo = xfrm_state_afinfo[family];
1471         if (unlikely(!afinfo))
1472                 read_unlock(&xfrm_state_afinfo_lock);
1473         return afinfo;
1474 }
1475
1476 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1477 {
1478         read_unlock(&xfrm_state_afinfo_lock);
1479 }
1480
1481 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1482 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1483 {
1484         if (x->tunnel) {
1485                 struct xfrm_state *t = x->tunnel;
1486
1487                 if (atomic_read(&t->tunnel_users) == 2)
1488                         xfrm_state_delete(t);
1489                 atomic_dec(&t->tunnel_users);
1490                 xfrm_state_put(t);
1491                 x->tunnel = NULL;
1492         }
1493 }
1494 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1495
1496 /*
1497  * This function is NOT optimal.  For example, with ESP it will give an
1498  * MTU that's usually two bytes short of being optimal.  However, it will
1499  * usually give an answer that's a multiple of 4 provided the input is
1500  * also a multiple of 4.
1501  */
1502 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1503 {
1504         int res = mtu;
1505
1506         res -= x->props.header_len;
1507
1508         for (;;) {
1509                 int m = res;
1510
1511                 if (m < 68)
1512                         return 68;
1513
1514                 spin_lock_bh(&x->lock);
1515                 if (x->km.state == XFRM_STATE_VALID &&
1516                     x->type && x->type->get_max_size)
1517                         m = x->type->get_max_size(x, m);
1518                 else
1519                         m += x->props.header_len;
1520                 spin_unlock_bh(&x->lock);
1521
1522                 if (m <= mtu)
1523                         break;
1524                 res -= (m - mtu);
1525         }
1526
1527         return res;
1528 }
1529
1530 int xfrm_init_state(struct xfrm_state *x)
1531 {
1532         struct xfrm_state_afinfo *afinfo;
1533         int family = x->props.family;
1534         int err;
1535
1536         err = -EAFNOSUPPORT;
1537         afinfo = xfrm_state_get_afinfo(family);
1538         if (!afinfo)
1539                 goto error;
1540
1541         err = 0;
1542         if (afinfo->init_flags)
1543                 err = afinfo->init_flags(x);
1544
1545         xfrm_state_put_afinfo(afinfo);
1546
1547         if (err)
1548                 goto error;
1549
1550         err = -EPROTONOSUPPORT;
1551         x->type = xfrm_get_type(x->id.proto, family);
1552         if (x->type == NULL)
1553                 goto error;
1554
1555         err = x->type->init_state(x);
1556         if (err)
1557                 goto error;
1558
1559         x->mode = xfrm_get_mode(x->props.mode, family);
1560         if (x->mode == NULL)
1561                 goto error;
1562
1563         x->km.state = XFRM_STATE_VALID;
1564
1565 error:
1566         return err;
1567 }
1568
1569 EXPORT_SYMBOL(xfrm_init_state);
1570  
1571 void __init xfrm_state_init(void)
1572 {
1573         unsigned int sz;
1574
1575         sz = sizeof(struct hlist_head) * 8;
1576
1577         xfrm_state_bydst = xfrm_hash_alloc(sz);
1578         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1579         xfrm_state_byspi = xfrm_hash_alloc(sz);
1580         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1581                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1582         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1583
1584         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1585 }
1586