vserver 1.9.5.x5
[linux-2.6.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *      
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22
23 /* Each xfrm_state may be linked to two tables:
24
25    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
26    2. Hash table by daddr to find what SAs exist for given
27       destination/tunnel endpoint. (output)
28  */
29
30 static DEFINE_SPINLOCK(xfrm_state_lock);
31
32 /* Hash table to find appropriate SA towards given target (endpoint
33  * of tunnel or destination of transport mode) allowed by selector.
34  *
35  * Main use is finding SA after policy selected tunnel or transport mode.
36  * Also, it can be used by ah/esp icmp error handler to find offending SA.
37  */
38 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
39 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40
41 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 EXPORT_SYMBOL(km_waitq);
43
44 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
45 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
46
47 static struct work_struct xfrm_state_gc_work;
48 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
49 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50
51 static void __xfrm_state_delete(struct xfrm_state *x);
52
53 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
54 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
55
56 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
57 static void km_state_expired(struct xfrm_state *x, int hard);
58
59 static void xfrm_state_gc_destroy(struct xfrm_state *x)
60 {
61         if (del_timer(&x->timer))
62                 BUG();
63         if (x->aalg)
64                 kfree(x->aalg);
65         if (x->ealg)
66                 kfree(x->ealg);
67         if (x->calg)
68                 kfree(x->calg);
69         if (x->encap)
70                 kfree(x->encap);
71         if (x->type) {
72                 x->type->destructor(x);
73                 xfrm_put_type(x->type);
74         }
75         kfree(x);
76 }
77
78 static void xfrm_state_gc_task(void *data)
79 {
80         struct xfrm_state *x;
81         struct list_head *entry, *tmp;
82         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
83
84         spin_lock_bh(&xfrm_state_gc_lock);
85         list_splice_init(&xfrm_state_gc_list, &gc_list);
86         spin_unlock_bh(&xfrm_state_gc_lock);
87
88         list_for_each_safe(entry, tmp, &gc_list) {
89                 x = list_entry(entry, struct xfrm_state, bydst);
90                 xfrm_state_gc_destroy(x);
91         }
92         wake_up(&km_waitq);
93 }
94
95 static inline unsigned long make_jiffies(long secs)
96 {
97         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
98                 return MAX_SCHEDULE_TIMEOUT-1;
99         else
100                 return secs*HZ;
101 }
102
103 static void xfrm_timer_handler(unsigned long data)
104 {
105         struct xfrm_state *x = (struct xfrm_state*)data;
106         unsigned long now = (unsigned long)xtime.tv_sec;
107         long next = LONG_MAX;
108         int warn = 0;
109
110         spin_lock(&x->lock);
111         if (x->km.state == XFRM_STATE_DEAD)
112                 goto out;
113         if (x->km.state == XFRM_STATE_EXPIRED)
114                 goto expired;
115         if (x->lft.hard_add_expires_seconds) {
116                 long tmo = x->lft.hard_add_expires_seconds +
117                         x->curlft.add_time - now;
118                 if (tmo <= 0)
119                         goto expired;
120                 if (tmo < next)
121                         next = tmo;
122         }
123         if (x->lft.hard_use_expires_seconds) {
124                 long tmo = x->lft.hard_use_expires_seconds +
125                         (x->curlft.use_time ? : now) - now;
126                 if (tmo <= 0)
127                         goto expired;
128                 if (tmo < next)
129                         next = tmo;
130         }
131         if (x->km.dying)
132                 goto resched;
133         if (x->lft.soft_add_expires_seconds) {
134                 long tmo = x->lft.soft_add_expires_seconds +
135                         x->curlft.add_time - now;
136                 if (tmo <= 0)
137                         warn = 1;
138                 else if (tmo < next)
139                         next = tmo;
140         }
141         if (x->lft.soft_use_expires_seconds) {
142                 long tmo = x->lft.soft_use_expires_seconds +
143                         (x->curlft.use_time ? : now) - now;
144                 if (tmo <= 0)
145                         warn = 1;
146                 else if (tmo < next)
147                         next = tmo;
148         }
149
150         if (warn)
151                 km_state_expired(x, 0);
152 resched:
153         if (next != LONG_MAX &&
154             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
155                 xfrm_state_hold(x);
156         goto out;
157
158 expired:
159         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
160                 x->km.state = XFRM_STATE_EXPIRED;
161                 wake_up(&km_waitq);
162                 next = 2;
163                 goto resched;
164         }
165         if (x->id.spi != 0)
166                 km_state_expired(x, 1);
167         __xfrm_state_delete(x);
168
169 out:
170         spin_unlock(&x->lock);
171         xfrm_state_put(x);
172 }
173
174 struct xfrm_state *xfrm_state_alloc(void)
175 {
176         struct xfrm_state *x;
177
178         x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
179
180         if (x) {
181                 memset(x, 0, sizeof(struct xfrm_state));
182                 atomic_set(&x->refcnt, 1);
183                 atomic_set(&x->tunnel_users, 0);
184                 INIT_LIST_HEAD(&x->bydst);
185                 INIT_LIST_HEAD(&x->byspi);
186                 init_timer(&x->timer);
187                 x->timer.function = xfrm_timer_handler;
188                 x->timer.data     = (unsigned long)x;
189                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
190                 x->lft.soft_byte_limit = XFRM_INF;
191                 x->lft.soft_packet_limit = XFRM_INF;
192                 x->lft.hard_byte_limit = XFRM_INF;
193                 x->lft.hard_packet_limit = XFRM_INF;
194                 spin_lock_init(&x->lock);
195         }
196         return x;
197 }
198 EXPORT_SYMBOL(xfrm_state_alloc);
199
200 void __xfrm_state_destroy(struct xfrm_state *x)
201 {
202         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
203
204         spin_lock_bh(&xfrm_state_gc_lock);
205         list_add(&x->bydst, &xfrm_state_gc_list);
206         spin_unlock_bh(&xfrm_state_gc_lock);
207         schedule_work(&xfrm_state_gc_work);
208 }
209 EXPORT_SYMBOL(__xfrm_state_destroy);
210
211 static void __xfrm_state_delete(struct xfrm_state *x)
212 {
213         if (x->km.state != XFRM_STATE_DEAD) {
214                 x->km.state = XFRM_STATE_DEAD;
215                 spin_lock(&xfrm_state_lock);
216                 list_del(&x->bydst);
217                 atomic_dec(&x->refcnt);
218                 if (x->id.spi) {
219                         list_del(&x->byspi);
220                         atomic_dec(&x->refcnt);
221                 }
222                 spin_unlock(&xfrm_state_lock);
223                 if (del_timer(&x->timer))
224                         atomic_dec(&x->refcnt);
225
226                 /* The number two in this test is the reference
227                  * mentioned in the comment below plus the reference
228                  * our caller holds.  A larger value means that
229                  * there are DSTs attached to this xfrm_state.
230                  */
231                 if (atomic_read(&x->refcnt) > 2)
232                         xfrm_flush_bundles();
233
234                 /* All xfrm_state objects are created by xfrm_state_alloc.
235                  * The xfrm_state_alloc call gives a reference, and that
236                  * is what we are dropping here.
237                  */
238                 atomic_dec(&x->refcnt);
239         }
240 }
241
242 void xfrm_state_delete(struct xfrm_state *x)
243 {
244         spin_lock_bh(&x->lock);
245         __xfrm_state_delete(x);
246         spin_unlock_bh(&x->lock);
247 }
248 EXPORT_SYMBOL(xfrm_state_delete);
249
250 void xfrm_state_flush(u8 proto)
251 {
252         int i;
253         struct xfrm_state *x;
254
255         spin_lock_bh(&xfrm_state_lock);
256         for (i = 0; i < XFRM_DST_HSIZE; i++) {
257 restart:
258                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
259                         if (!xfrm_state_kern(x) &&
260                             (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
261                                 xfrm_state_hold(x);
262                                 spin_unlock_bh(&xfrm_state_lock);
263
264                                 xfrm_state_delete(x);
265                                 xfrm_state_put(x);
266
267                                 spin_lock_bh(&xfrm_state_lock);
268                                 goto restart;
269                         }
270                 }
271         }
272         spin_unlock_bh(&xfrm_state_lock);
273         wake_up(&km_waitq);
274 }
275 EXPORT_SYMBOL(xfrm_state_flush);
276
277 static int
278 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
279                   struct xfrm_tmpl *tmpl,
280                   xfrm_address_t *daddr, xfrm_address_t *saddr,
281                   unsigned short family)
282 {
283         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
284         if (!afinfo)
285                 return -1;
286         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
287         xfrm_state_put_afinfo(afinfo);
288         return 0;
289 }
290
291 struct xfrm_state *
292 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
293                 struct flowi *fl, struct xfrm_tmpl *tmpl,
294                 struct xfrm_policy *pol, int *err,
295                 unsigned short family)
296 {
297         unsigned h = xfrm_dst_hash(daddr, family);
298         struct xfrm_state *x;
299         int acquire_in_progress = 0;
300         int error = 0;
301         struct xfrm_state *best = NULL;
302
303         spin_lock_bh(&xfrm_state_lock);
304         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
305                 if (x->props.family == family &&
306                     x->props.reqid == tmpl->reqid &&
307                     xfrm_state_addr_check(x, daddr, saddr, family) &&
308                     tmpl->mode == x->props.mode &&
309                     tmpl->id.proto == x->id.proto) {
310                         /* Resolution logic:
311                            1. There is a valid state with matching selector.
312                               Done.
313                            2. Valid state with inappropriate selector. Skip.
314
315                            Entering area of "sysdeps".
316
317                            3. If state is not valid, selector is temporary,
318                               it selects only session which triggered
319                               previous resolution. Key manager will do
320                               something to install a state with proper
321                               selector.
322                          */
323                         if (x->km.state == XFRM_STATE_VALID) {
324                                 if (!xfrm_selector_match(&x->sel, fl, family))
325                                         continue;
326                                 if (!best ||
327                                     best->km.dying > x->km.dying ||
328                                     (best->km.dying == x->km.dying &&
329                                      best->curlft.add_time < x->curlft.add_time))
330                                         best = x;
331                         } else if (x->km.state == XFRM_STATE_ACQ) {
332                                 acquire_in_progress = 1;
333                         } else if (x->km.state == XFRM_STATE_ERROR ||
334                                    x->km.state == XFRM_STATE_EXPIRED) {
335                                 if (xfrm_selector_match(&x->sel, fl, family))
336                                         error = 1;
337                         }
338                 }
339         }
340
341         x = best;
342         if (!x && !error && !acquire_in_progress &&
343             ((x = xfrm_state_alloc()) != NULL)) {
344                 /* Initialize temporary selector matching only
345                  * to current session. */
346                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
347
348                 if (km_query(x, tmpl, pol) == 0) {
349                         x->km.state = XFRM_STATE_ACQ;
350                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
351                         xfrm_state_hold(x);
352                         if (x->id.spi) {
353                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
354                                 list_add(&x->byspi, xfrm_state_byspi+h);
355                                 xfrm_state_hold(x);
356                         }
357                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
358                         xfrm_state_hold(x);
359                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
360                         add_timer(&x->timer);
361                 } else {
362                         x->km.state = XFRM_STATE_DEAD;
363                         xfrm_state_put(x);
364                         x = NULL;
365                         error = 1;
366                 }
367         }
368         if (x)
369                 xfrm_state_hold(x);
370         else
371                 *err = acquire_in_progress ? -EAGAIN :
372                         (error ? -ESRCH : -ENOMEM);
373         spin_unlock_bh(&xfrm_state_lock);
374         return x;
375 }
376
377 static void __xfrm_state_insert(struct xfrm_state *x)
378 {
379         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
380
381         list_add(&x->bydst, xfrm_state_bydst+h);
382         xfrm_state_hold(x);
383
384         h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
385
386         list_add(&x->byspi, xfrm_state_byspi+h);
387         xfrm_state_hold(x);
388
389         if (!mod_timer(&x->timer, jiffies + HZ))
390                 xfrm_state_hold(x);
391
392         wake_up(&km_waitq);
393 }
394
395 void xfrm_state_insert(struct xfrm_state *x)
396 {
397         spin_lock_bh(&xfrm_state_lock);
398         __xfrm_state_insert(x);
399         spin_unlock_bh(&xfrm_state_lock);
400 }
401 EXPORT_SYMBOL(xfrm_state_insert);
402
403 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
404
405 int xfrm_state_add(struct xfrm_state *x)
406 {
407         struct xfrm_state_afinfo *afinfo;
408         struct xfrm_state *x1;
409         int family;
410         int err;
411
412         family = x->props.family;
413         afinfo = xfrm_state_get_afinfo(family);
414         if (unlikely(afinfo == NULL))
415                 return -EAFNOSUPPORT;
416
417         spin_lock_bh(&xfrm_state_lock);
418
419         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
420         if (x1) {
421                 xfrm_state_put(x1);
422                 x1 = NULL;
423                 err = -EEXIST;
424                 goto out;
425         }
426
427         if (x->km.seq) {
428                 x1 = __xfrm_find_acq_byseq(x->km.seq);
429                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
430                         xfrm_state_put(x1);
431                         x1 = NULL;
432                 }
433         }
434
435         if (!x1)
436                 x1 = afinfo->find_acq(
437                         x->props.mode, x->props.reqid, x->id.proto,
438                         &x->id.daddr, &x->props.saddr, 0);
439
440         __xfrm_state_insert(x);
441         err = 0;
442
443 out:
444         spin_unlock_bh(&xfrm_state_lock);
445         xfrm_state_put_afinfo(afinfo);
446
447         if (x1) {
448                 xfrm_state_delete(x1);
449                 xfrm_state_put(x1);
450         }
451
452         return err;
453 }
454 EXPORT_SYMBOL(xfrm_state_add);
455
456 int xfrm_state_update(struct xfrm_state *x)
457 {
458         struct xfrm_state_afinfo *afinfo;
459         struct xfrm_state *x1;
460         int err;
461
462         afinfo = xfrm_state_get_afinfo(x->props.family);
463         if (unlikely(afinfo == NULL))
464                 return -EAFNOSUPPORT;
465
466         spin_lock_bh(&xfrm_state_lock);
467         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
468
469         err = -ESRCH;
470         if (!x1)
471                 goto out;
472
473         if (xfrm_state_kern(x1)) {
474                 xfrm_state_put(x1);
475                 err = -EEXIST;
476                 goto out;
477         }
478
479         if (x1->km.state == XFRM_STATE_ACQ) {
480                 __xfrm_state_insert(x);
481                 x = NULL;
482         }
483         err = 0;
484
485 out:
486         spin_unlock_bh(&xfrm_state_lock);
487         xfrm_state_put_afinfo(afinfo);
488
489         if (err)
490                 return err;
491
492         if (!x) {
493                 xfrm_state_delete(x1);
494                 xfrm_state_put(x1);
495                 return 0;
496         }
497
498         err = -EINVAL;
499         spin_lock_bh(&x1->lock);
500         if (likely(x1->km.state == XFRM_STATE_VALID)) {
501                 if (x->encap && x1->encap)
502                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
503                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
504                 x1->km.dying = 0;
505
506                 if (!mod_timer(&x1->timer, jiffies + HZ))
507                         xfrm_state_hold(x1);
508                 if (x1->curlft.use_time)
509                         xfrm_state_check_expire(x1);
510
511                 err = 0;
512         }
513         spin_unlock_bh(&x1->lock);
514
515         xfrm_state_put(x1);
516
517         return err;
518 }
519 EXPORT_SYMBOL(xfrm_state_update);
520
521 int xfrm_state_check_expire(struct xfrm_state *x)
522 {
523         if (!x->curlft.use_time)
524                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
525
526         if (x->km.state != XFRM_STATE_VALID)
527                 return -EINVAL;
528
529         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
530             x->curlft.packets >= x->lft.hard_packet_limit) {
531                 km_state_expired(x, 1);
532                 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
533                         xfrm_state_hold(x);
534                 return -EINVAL;
535         }
536
537         if (!x->km.dying &&
538             (x->curlft.bytes >= x->lft.soft_byte_limit ||
539              x->curlft.packets >= x->lft.soft_packet_limit))
540                 km_state_expired(x, 0);
541         return 0;
542 }
543 EXPORT_SYMBOL(xfrm_state_check_expire);
544
545 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
546 {
547         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
548                 - skb_headroom(skb);
549
550         if (nhead > 0)
551                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
552
553         /* Check tail too... */
554         return 0;
555 }
556
557 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
558 {
559         int err = xfrm_state_check_expire(x);
560         if (err < 0)
561                 goto err;
562         err = xfrm_state_check_space(x, skb);
563 err:
564         return err;
565 }
566 EXPORT_SYMBOL(xfrm_state_check);
567
568 struct xfrm_state *
569 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
570                   unsigned short family)
571 {
572         struct xfrm_state *x;
573         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
574         if (!afinfo)
575                 return NULL;
576
577         spin_lock_bh(&xfrm_state_lock);
578         x = afinfo->state_lookup(daddr, spi, proto);
579         spin_unlock_bh(&xfrm_state_lock);
580         xfrm_state_put_afinfo(afinfo);
581         return x;
582 }
583 EXPORT_SYMBOL(xfrm_state_lookup);
584
585 struct xfrm_state *
586 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
587               xfrm_address_t *daddr, xfrm_address_t *saddr, 
588               int create, unsigned short family)
589 {
590         struct xfrm_state *x;
591         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
592         if (!afinfo)
593                 return NULL;
594
595         spin_lock_bh(&xfrm_state_lock);
596         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
597         spin_unlock_bh(&xfrm_state_lock);
598         xfrm_state_put_afinfo(afinfo);
599         return x;
600 }
601 EXPORT_SYMBOL(xfrm_find_acq);
602
603 /* Silly enough, but I'm lazy to build resolution list */
604
605 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
606 {
607         int i;
608         struct xfrm_state *x;
609
610         for (i = 0; i < XFRM_DST_HSIZE; i++) {
611                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
612                         if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
613                                 xfrm_state_hold(x);
614                                 return x;
615                         }
616                 }
617         }
618         return NULL;
619 }
620
621 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
622 {
623         struct xfrm_state *x;
624
625         spin_lock_bh(&xfrm_state_lock);
626         x = __xfrm_find_acq_byseq(seq);
627         spin_unlock_bh(&xfrm_state_lock);
628         return x;
629 }
630 EXPORT_SYMBOL(xfrm_find_acq_byseq);
631
632 u32 xfrm_get_acqseq(void)
633 {
634         u32 res;
635         static u32 acqseq;
636         static DEFINE_SPINLOCK(acqseq_lock);
637
638         spin_lock_bh(&acqseq_lock);
639         res = (++acqseq ? : ++acqseq);
640         spin_unlock_bh(&acqseq_lock);
641         return res;
642 }
643 EXPORT_SYMBOL(xfrm_get_acqseq);
644
645 void
646 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
647 {
648         u32 h;
649         struct xfrm_state *x0;
650
651         if (x->id.spi)
652                 return;
653
654         if (minspi == maxspi) {
655                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
656                 if (x0) {
657                         xfrm_state_put(x0);
658                         return;
659                 }
660                 x->id.spi = minspi;
661         } else {
662                 u32 spi = 0;
663                 minspi = ntohl(minspi);
664                 maxspi = ntohl(maxspi);
665                 for (h=0; h<maxspi-minspi+1; h++) {
666                         spi = minspi + net_random()%(maxspi-minspi+1);
667                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
668                         if (x0 == NULL) {
669                                 x->id.spi = htonl(spi);
670                                 break;
671                         }
672                         xfrm_state_put(x0);
673                 }
674         }
675         if (x->id.spi) {
676                 spin_lock_bh(&xfrm_state_lock);
677                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
678                 list_add(&x->byspi, xfrm_state_byspi+h);
679                 xfrm_state_hold(x);
680                 spin_unlock_bh(&xfrm_state_lock);
681                 wake_up(&km_waitq);
682         }
683 }
684 EXPORT_SYMBOL(xfrm_alloc_spi);
685
686 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
687                     void *data)
688 {
689         int i;
690         struct xfrm_state *x;
691         int count = 0;
692         int err = 0;
693
694         spin_lock_bh(&xfrm_state_lock);
695         for (i = 0; i < XFRM_DST_HSIZE; i++) {
696                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
697                         if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
698                                 count++;
699                 }
700         }
701         if (count == 0) {
702                 err = -ENOENT;
703                 goto out;
704         }
705
706         for (i = 0; i < XFRM_DST_HSIZE; i++) {
707                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
708                         if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
709                                 continue;
710                         err = func(x, --count, data);
711                         if (err)
712                                 goto out;
713                 }
714         }
715 out:
716         spin_unlock_bh(&xfrm_state_lock);
717         return err;
718 }
719 EXPORT_SYMBOL(xfrm_state_walk);
720
721 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
722 {
723         u32 diff;
724
725         seq = ntohl(seq);
726
727         if (unlikely(seq == 0))
728                 return -EINVAL;
729
730         if (likely(seq > x->replay.seq))
731                 return 0;
732
733         diff = x->replay.seq - seq;
734         if (diff >= x->props.replay_window) {
735                 x->stats.replay_window++;
736                 return -EINVAL;
737         }
738
739         if (x->replay.bitmap & (1U << diff)) {
740                 x->stats.replay++;
741                 return -EINVAL;
742         }
743         return 0;
744 }
745 EXPORT_SYMBOL(xfrm_replay_check);
746
747 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
748 {
749         u32 diff;
750
751         seq = ntohl(seq);
752
753         if (seq > x->replay.seq) {
754                 diff = seq - x->replay.seq;
755                 if (diff < x->props.replay_window)
756                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
757                 else
758                         x->replay.bitmap = 1;
759                 x->replay.seq = seq;
760         } else {
761                 diff = x->replay.seq - seq;
762                 x->replay.bitmap |= (1U << diff);
763         }
764 }
765 EXPORT_SYMBOL(xfrm_replay_advance);
766
767 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
768 static DEFINE_RWLOCK(xfrm_km_lock);
769
770 static void km_state_expired(struct xfrm_state *x, int hard)
771 {
772         struct xfrm_mgr *km;
773
774         if (hard)
775                 x->km.state = XFRM_STATE_EXPIRED;
776         else
777                 x->km.dying = 1;
778
779         read_lock(&xfrm_km_lock);
780         list_for_each_entry(km, &xfrm_km_list, list)
781                 km->notify(x, hard);
782         read_unlock(&xfrm_km_lock);
783
784         if (hard)
785                 wake_up(&km_waitq);
786 }
787
788 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
789 {
790         int err = -EINVAL;
791         struct xfrm_mgr *km;
792
793         read_lock(&xfrm_km_lock);
794         list_for_each_entry(km, &xfrm_km_list, list) {
795                 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
796                 if (!err)
797                         break;
798         }
799         read_unlock(&xfrm_km_lock);
800         return err;
801 }
802
803 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
804 {
805         int err = -EINVAL;
806         struct xfrm_mgr *km;
807
808         read_lock(&xfrm_km_lock);
809         list_for_each_entry(km, &xfrm_km_list, list) {
810                 if (km->new_mapping)
811                         err = km->new_mapping(x, ipaddr, sport);
812                 if (!err)
813                         break;
814         }
815         read_unlock(&xfrm_km_lock);
816         return err;
817 }
818 EXPORT_SYMBOL(km_new_mapping);
819
820 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
821 {
822         struct xfrm_mgr *km;
823
824         read_lock(&xfrm_km_lock);
825         list_for_each_entry(km, &xfrm_km_list, list)
826                 if (km->notify_policy)
827                         km->notify_policy(pol, dir, hard);
828         read_unlock(&xfrm_km_lock);
829
830         if (hard)
831                 wake_up(&km_waitq);
832 }
833
834 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
835 {
836         int err;
837         u8 *data;
838         struct xfrm_mgr *km;
839         struct xfrm_policy *pol = NULL;
840
841         if (optlen <= 0 || optlen > PAGE_SIZE)
842                 return -EMSGSIZE;
843
844         data = kmalloc(optlen, GFP_KERNEL);
845         if (!data)
846                 return -ENOMEM;
847
848         err = -EFAULT;
849         if (copy_from_user(data, optval, optlen))
850                 goto out;
851
852         err = -EINVAL;
853         read_lock(&xfrm_km_lock);
854         list_for_each_entry(km, &xfrm_km_list, list) {
855                 pol = km->compile_policy(sk->sk_family, optname, data,
856                                          optlen, &err);
857                 if (err >= 0)
858                         break;
859         }
860         read_unlock(&xfrm_km_lock);
861
862         if (err >= 0) {
863                 xfrm_sk_policy_insert(sk, err, pol);
864                 xfrm_pol_put(pol);
865                 err = 0;
866         }
867
868 out:
869         kfree(data);
870         return err;
871 }
872 EXPORT_SYMBOL(xfrm_user_policy);
873
874 int xfrm_register_km(struct xfrm_mgr *km)
875 {
876         write_lock_bh(&xfrm_km_lock);
877         list_add_tail(&km->list, &xfrm_km_list);
878         write_unlock_bh(&xfrm_km_lock);
879         return 0;
880 }
881 EXPORT_SYMBOL(xfrm_register_km);
882
883 int xfrm_unregister_km(struct xfrm_mgr *km)
884 {
885         write_lock_bh(&xfrm_km_lock);
886         list_del(&km->list);
887         write_unlock_bh(&xfrm_km_lock);
888         return 0;
889 }
890 EXPORT_SYMBOL(xfrm_unregister_km);
891
892 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
893 {
894         int err = 0;
895         if (unlikely(afinfo == NULL))
896                 return -EINVAL;
897         if (unlikely(afinfo->family >= NPROTO))
898                 return -EAFNOSUPPORT;
899         write_lock(&xfrm_state_afinfo_lock);
900         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
901                 err = -ENOBUFS;
902         else {
903                 afinfo->state_bydst = xfrm_state_bydst;
904                 afinfo->state_byspi = xfrm_state_byspi;
905                 xfrm_state_afinfo[afinfo->family] = afinfo;
906         }
907         write_unlock(&xfrm_state_afinfo_lock);
908         return err;
909 }
910 EXPORT_SYMBOL(xfrm_state_register_afinfo);
911
912 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
913 {
914         int err = 0;
915         if (unlikely(afinfo == NULL))
916                 return -EINVAL;
917         if (unlikely(afinfo->family >= NPROTO))
918                 return -EAFNOSUPPORT;
919         write_lock(&xfrm_state_afinfo_lock);
920         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
921                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
922                         err = -EINVAL;
923                 else {
924                         xfrm_state_afinfo[afinfo->family] = NULL;
925                         afinfo->state_byspi = NULL;
926                         afinfo->state_bydst = NULL;
927                 }
928         }
929         write_unlock(&xfrm_state_afinfo_lock);
930         return err;
931 }
932 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
933
934 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
935 {
936         struct xfrm_state_afinfo *afinfo;
937         if (unlikely(family >= NPROTO))
938                 return NULL;
939         read_lock(&xfrm_state_afinfo_lock);
940         afinfo = xfrm_state_afinfo[family];
941         if (likely(afinfo != NULL))
942                 read_lock(&afinfo->lock);
943         read_unlock(&xfrm_state_afinfo_lock);
944         return afinfo;
945 }
946
947 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
948 {
949         if (unlikely(afinfo == NULL))
950                 return;
951         read_unlock(&afinfo->lock);
952 }
953
954 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
955 void xfrm_state_delete_tunnel(struct xfrm_state *x)
956 {
957         if (x->tunnel) {
958                 struct xfrm_state *t = x->tunnel;
959
960                 if (atomic_read(&t->tunnel_users) == 2)
961                         xfrm_state_delete(t);
962                 atomic_dec(&t->tunnel_users);
963                 xfrm_state_put(t);
964                 x->tunnel = NULL;
965         }
966 }
967 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
968
969 void __init xfrm_state_init(void)
970 {
971         int i;
972
973         for (i=0; i<XFRM_DST_HSIZE; i++) {
974                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
975                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
976         }
977         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
978 }
979