6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
22 /* Each xfrm_state may be linked to two tables:
24 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25 2. Hash table by daddr to find what SAs exist for given
26 destination/tunnel endpoint. (output)
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
31 /* Hash table to find appropriate SA towards given target (endpoint
32 * of tunnel or destination of transport mode) allowed by selector.
34 * Main use is finding SA after policy selected tunnel or transport mode.
35 * Also, it can be used by ah/esp icmp error handler to find offending SA.
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
49 static void __xfrm_state_delete(struct xfrm_state *x);
51 static void xfrm_state_gc_destroy(struct xfrm_state *x)
53 if (del_timer(&x->timer))
64 x->type->destructor(x);
65 xfrm_put_type(x->type);
71 static void xfrm_state_gc_task(void *data)
74 struct list_head *entry, *tmp;
75 struct list_head gc_list = LIST_HEAD_INIT(gc_list);
77 spin_lock_bh(&xfrm_state_gc_lock);
78 list_splice_init(&xfrm_state_gc_list, &gc_list);
79 spin_unlock_bh(&xfrm_state_gc_lock);
81 list_for_each_safe(entry, tmp, &gc_list) {
82 x = list_entry(entry, struct xfrm_state, bydst);
83 xfrm_state_gc_destroy(x);
87 static inline unsigned long make_jiffies(long secs)
89 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
90 return MAX_SCHEDULE_TIMEOUT-1;
95 static void xfrm_timer_handler(unsigned long data)
97 struct xfrm_state *x = (struct xfrm_state*)data;
98 unsigned long now = (unsigned long)xtime.tv_sec;
103 if (x->km.state == XFRM_STATE_DEAD)
105 if (x->km.state == XFRM_STATE_EXPIRED)
107 if (x->lft.hard_add_expires_seconds) {
108 long tmo = x->lft.hard_add_expires_seconds +
109 x->curlft.add_time - now;
115 if (x->lft.hard_use_expires_seconds) {
116 long tmo = x->lft.hard_use_expires_seconds +
117 (x->curlft.use_time ? : now) - now;
125 if (x->lft.soft_add_expires_seconds) {
126 long tmo = x->lft.soft_add_expires_seconds +
127 x->curlft.add_time - now;
133 if (x->lft.soft_use_expires_seconds) {
134 long tmo = x->lft.soft_use_expires_seconds +
135 (x->curlft.use_time ? : now) - now;
143 km_state_expired(x, 0);
145 if (next != LONG_MAX &&
146 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
151 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
152 x->km.state = XFRM_STATE_EXPIRED;
158 km_state_expired(x, 1);
159 __xfrm_state_delete(x);
162 spin_unlock(&x->lock);
166 struct xfrm_state *xfrm_state_alloc(void)
168 struct xfrm_state *x;
170 x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
173 memset(x, 0, sizeof(struct xfrm_state));
174 atomic_set(&x->refcnt, 1);
175 atomic_set(&x->tunnel_users, 0);
176 INIT_LIST_HEAD(&x->bydst);
177 INIT_LIST_HEAD(&x->byspi);
178 init_timer(&x->timer);
179 x->timer.function = xfrm_timer_handler;
180 x->timer.data = (unsigned long)x;
181 x->curlft.add_time = (unsigned long)xtime.tv_sec;
182 x->lft.soft_byte_limit = XFRM_INF;
183 x->lft.soft_packet_limit = XFRM_INF;
184 x->lft.hard_byte_limit = XFRM_INF;
185 x->lft.hard_packet_limit = XFRM_INF;
186 x->lock = SPIN_LOCK_UNLOCKED;
191 void __xfrm_state_destroy(struct xfrm_state *x)
193 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
195 spin_lock_bh(&xfrm_state_gc_lock);
196 list_add(&x->bydst, &xfrm_state_gc_list);
197 spin_unlock_bh(&xfrm_state_gc_lock);
198 schedule_work(&xfrm_state_gc_work);
201 static void __xfrm_state_delete(struct xfrm_state *x)
203 if (x->km.state != XFRM_STATE_DEAD) {
204 x->km.state = XFRM_STATE_DEAD;
205 spin_lock(&xfrm_state_lock);
207 atomic_dec(&x->refcnt);
210 atomic_dec(&x->refcnt);
212 spin_unlock(&xfrm_state_lock);
213 if (del_timer(&x->timer))
214 atomic_dec(&x->refcnt);
216 /* The number two in this test is the reference
217 * mentioned in the comment below plus the reference
218 * our caller holds. A larger value means that
219 * there are DSTs attached to this xfrm_state.
221 if (atomic_read(&x->refcnt) > 2)
222 xfrm_flush_bundles();
224 /* All xfrm_state objects are created by one of two possible
227 * 2) xfrm_state_lookup --> xfrm_state_insert
229 * The xfrm_state_lookup or xfrm_state_alloc call gives a
230 * reference, and that is what we are dropping here.
232 atomic_dec(&x->refcnt);
236 void xfrm_state_delete(struct xfrm_state *x)
238 xfrm_state_delete_tunnel(x);
239 spin_lock_bh(&x->lock);
240 __xfrm_state_delete(x);
241 spin_unlock_bh(&x->lock);
244 void xfrm_state_flush(u8 proto)
247 struct xfrm_state *x;
249 spin_lock_bh(&xfrm_state_lock);
250 for (i = 0; i < XFRM_DST_HSIZE; i++) {
252 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
253 if (!xfrm_state_kern(x) &&
254 (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
256 spin_unlock_bh(&xfrm_state_lock);
258 xfrm_state_delete(x);
261 spin_lock_bh(&xfrm_state_lock);
266 spin_unlock_bh(&xfrm_state_lock);
271 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
272 struct xfrm_tmpl *tmpl,
273 xfrm_address_t *daddr, xfrm_address_t *saddr,
274 unsigned short family)
276 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
279 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
280 xfrm_state_put_afinfo(afinfo);
285 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
286 struct flowi *fl, struct xfrm_tmpl *tmpl,
287 struct xfrm_policy *pol, int *err,
288 unsigned short family)
290 unsigned h = xfrm_dst_hash(daddr, family);
291 struct xfrm_state *x;
292 int acquire_in_progress = 0;
294 struct xfrm_state *best = NULL;
296 spin_lock_bh(&xfrm_state_lock);
297 list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
298 if (x->props.family == family &&
299 x->props.reqid == tmpl->reqid &&
300 xfrm_state_addr_check(x, daddr, saddr, family) &&
301 tmpl->mode == x->props.mode &&
302 tmpl->id.proto == x->id.proto) {
304 1. There is a valid state with matching selector.
306 2. Valid state with inappropriate selector. Skip.
308 Entering area of "sysdeps".
310 3. If state is not valid, selector is temporary,
311 it selects only session which triggered
312 previous resolution. Key manager will do
313 something to install a state with proper
316 if (x->km.state == XFRM_STATE_VALID) {
317 if (!xfrm_selector_match(&x->sel, fl, family))
320 best->km.dying > x->km.dying ||
321 (best->km.dying == x->km.dying &&
322 best->curlft.add_time < x->curlft.add_time))
324 } else if (x->km.state == XFRM_STATE_ACQ) {
325 acquire_in_progress = 1;
326 } else if (x->km.state == XFRM_STATE_ERROR ||
327 x->km.state == XFRM_STATE_EXPIRED) {
328 if (xfrm_selector_match(&x->sel, fl, family))
335 xfrm_state_hold(best);
336 spin_unlock_bh(&xfrm_state_lock);
341 if (!error && !acquire_in_progress &&
342 ((x = xfrm_state_alloc()) != NULL)) {
343 /* Initialize temporary selector matching only
344 * to current session. */
345 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
347 if (km_query(x, tmpl, pol) == 0) {
348 x->km.state = XFRM_STATE_ACQ;
349 list_add_tail(&x->bydst, xfrm_state_bydst+h);
352 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
353 list_add(&x->byspi, xfrm_state_byspi+h);
356 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
358 mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ);
360 x->km.state = XFRM_STATE_DEAD;
366 spin_unlock_bh(&xfrm_state_lock);
368 *err = acquire_in_progress ? -EAGAIN :
369 (error ? -ESRCH : -ENOMEM);
373 static void __xfrm_state_insert(struct xfrm_state *x)
375 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
377 list_add(&x->bydst, xfrm_state_bydst+h);
380 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
382 list_add(&x->byspi, xfrm_state_byspi+h);
385 if (!mod_timer(&x->timer, jiffies + HZ))
391 void xfrm_state_insert(struct xfrm_state *x)
393 spin_lock_bh(&xfrm_state_lock);
394 __xfrm_state_insert(x);
395 spin_unlock_bh(&xfrm_state_lock);
398 int xfrm_state_add(struct xfrm_state *x)
400 struct xfrm_state_afinfo *afinfo;
401 struct xfrm_state *x1;
404 afinfo = xfrm_state_get_afinfo(x->props.family);
405 if (unlikely(afinfo == NULL))
406 return -EAFNOSUPPORT;
408 spin_lock_bh(&xfrm_state_lock);
410 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
412 x1 = afinfo->find_acq(
413 x->props.mode, x->props.reqid, x->id.proto,
414 &x->id.daddr, &x->props.saddr, 0);
415 if (x1 && x1->id.spi != x->id.spi && x1->id.spi) {
421 if (x1 && x1->id.spi) {
428 __xfrm_state_insert(x);
432 spin_unlock_bh(&xfrm_state_lock);
433 xfrm_state_put_afinfo(afinfo);
436 xfrm_state_delete(x1);
443 int xfrm_state_update(struct xfrm_state *x)
445 struct xfrm_state_afinfo *afinfo;
446 struct xfrm_state *x1;
449 afinfo = xfrm_state_get_afinfo(x->props.family);
450 if (unlikely(afinfo == NULL))
451 return -EAFNOSUPPORT;
453 spin_lock_bh(&xfrm_state_lock);
454 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
460 if (xfrm_state_kern(x1)) {
466 if (x1->km.state == XFRM_STATE_ACQ) {
467 __xfrm_state_insert(x);
473 spin_unlock_bh(&xfrm_state_lock);
474 xfrm_state_put_afinfo(afinfo);
480 xfrm_state_delete(x1);
486 spin_lock_bh(&x1->lock);
487 if (likely(x1->km.state == XFRM_STATE_VALID)) {
488 if (x->encap && x1->encap)
489 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
490 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
494 spin_unlock_bh(&x1->lock);
496 if (!mod_timer(&x1->timer, jiffies + HZ))
498 if (x1->curlft.use_time)
499 xfrm_state_check_expire(x1);
506 int xfrm_state_check_expire(struct xfrm_state *x)
508 if (!x->curlft.use_time)
509 x->curlft.use_time = (unsigned long)xtime.tv_sec;
511 if (x->km.state != XFRM_STATE_VALID)
514 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
515 x->curlft.packets >= x->lft.hard_packet_limit) {
516 km_state_expired(x, 1);
517 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
523 (x->curlft.bytes >= x->lft.soft_byte_limit ||
524 x->curlft.packets >= x->lft.soft_packet_limit))
525 km_state_expired(x, 0);
529 int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
531 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
535 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
537 /* Check tail too... */
542 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
543 unsigned short family)
545 struct xfrm_state *x;
546 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
550 spin_lock_bh(&xfrm_state_lock);
551 x = afinfo->state_lookup(daddr, spi, proto);
552 spin_unlock_bh(&xfrm_state_lock);
553 xfrm_state_put_afinfo(afinfo);
558 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
559 xfrm_address_t *daddr, xfrm_address_t *saddr,
560 int create, unsigned short family)
562 struct xfrm_state *x;
563 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
567 spin_lock_bh(&xfrm_state_lock);
568 x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
569 spin_unlock_bh(&xfrm_state_lock);
570 xfrm_state_put_afinfo(afinfo);
574 /* Silly enough, but I'm lazy to build resolution list */
576 struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
579 struct xfrm_state *x;
581 spin_lock_bh(&xfrm_state_lock);
582 for (i = 0; i < XFRM_DST_HSIZE; i++) {
583 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
584 if (x->km.seq == seq) {
586 spin_unlock_bh(&xfrm_state_lock);
591 spin_unlock_bh(&xfrm_state_lock);
595 u32 xfrm_get_acqseq(void)
599 static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
601 spin_lock_bh(&acqseq_lock);
602 res = (++acqseq ? : ++acqseq);
603 spin_unlock_bh(&acqseq_lock);
608 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
611 struct xfrm_state *x0;
616 if (minspi == maxspi) {
617 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
625 minspi = ntohl(minspi);
626 maxspi = ntohl(maxspi);
627 for (h=0; h<maxspi-minspi+1; h++) {
628 spi = minspi + net_random()%(maxspi-minspi+1);
629 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
634 x->id.spi = htonl(spi);
637 spin_lock_bh(&xfrm_state_lock);
638 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
639 list_add(&x->byspi, xfrm_state_byspi+h);
641 spin_unlock_bh(&xfrm_state_lock);
646 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
650 struct xfrm_state *x;
654 spin_lock_bh(&xfrm_state_lock);
655 for (i = 0; i < XFRM_DST_HSIZE; i++) {
656 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
657 if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
666 for (i = 0; i < XFRM_DST_HSIZE; i++) {
667 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
668 if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
670 err = func(x, --count, data);
676 spin_unlock_bh(&xfrm_state_lock);
681 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
687 if (unlikely(seq == 0))
690 if (likely(seq > x->replay.seq))
693 diff = x->replay.seq - seq;
694 if (diff >= x->props.replay_window) {
695 x->stats.replay_window++;
699 if (x->replay.bitmap & (1U << diff)) {
706 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
712 if (seq > x->replay.seq) {
713 diff = seq - x->replay.seq;
714 if (diff < x->props.replay_window)
715 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
717 x->replay.bitmap = 1;
720 diff = x->replay.seq - seq;
721 x->replay.bitmap |= (1U << diff);
725 int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
729 for (i=0; i<n; i++) {
731 match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
738 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
739 static rwlock_t xfrm_km_lock = RW_LOCK_UNLOCKED;
741 void km_state_expired(struct xfrm_state *x, int hard)
746 x->km.state = XFRM_STATE_EXPIRED;
750 read_lock(&xfrm_km_lock);
751 list_for_each_entry(km, &xfrm_km_list, list)
753 read_unlock(&xfrm_km_lock);
759 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
764 read_lock(&xfrm_km_lock);
765 list_for_each_entry(km, &xfrm_km_list, list) {
766 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
770 read_unlock(&xfrm_km_lock);
774 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
779 read_lock(&xfrm_km_lock);
780 list_for_each_entry(km, &xfrm_km_list, list) {
782 err = km->new_mapping(x, ipaddr, sport);
786 read_unlock(&xfrm_km_lock);
790 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
794 read_lock(&xfrm_km_lock);
795 list_for_each_entry(km, &xfrm_km_list, list)
796 if (km->notify_policy)
797 km->notify_policy(pol, dir, hard);
798 read_unlock(&xfrm_km_lock);
804 int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
809 struct xfrm_policy *pol = NULL;
811 if (optlen <= 0 || optlen > PAGE_SIZE)
814 data = kmalloc(optlen, GFP_KERNEL);
819 if (copy_from_user(data, optval, optlen))
823 read_lock(&xfrm_km_lock);
824 list_for_each_entry(km, &xfrm_km_list, list) {
825 pol = km->compile_policy(sk->sk_family, optname, data,
830 read_unlock(&xfrm_km_lock);
833 xfrm_sk_policy_insert(sk, err, pol);
843 int xfrm_register_km(struct xfrm_mgr *km)
845 write_lock_bh(&xfrm_km_lock);
846 list_add_tail(&km->list, &xfrm_km_list);
847 write_unlock_bh(&xfrm_km_lock);
851 int xfrm_unregister_km(struct xfrm_mgr *km)
853 write_lock_bh(&xfrm_km_lock);
855 write_unlock_bh(&xfrm_km_lock);
859 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
862 if (unlikely(afinfo == NULL))
864 if (unlikely(afinfo->family >= NPROTO))
865 return -EAFNOSUPPORT;
866 write_lock(&xfrm_state_afinfo_lock);
867 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
870 afinfo->state_bydst = xfrm_state_bydst;
871 afinfo->state_byspi = xfrm_state_byspi;
872 xfrm_state_afinfo[afinfo->family] = afinfo;
874 write_unlock(&xfrm_state_afinfo_lock);
878 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
881 if (unlikely(afinfo == NULL))
883 if (unlikely(afinfo->family >= NPROTO))
884 return -EAFNOSUPPORT;
885 write_lock(&xfrm_state_afinfo_lock);
886 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
887 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
890 xfrm_state_afinfo[afinfo->family] = NULL;
891 afinfo->state_byspi = NULL;
892 afinfo->state_bydst = NULL;
895 write_unlock(&xfrm_state_afinfo_lock);
899 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
901 struct xfrm_state_afinfo *afinfo;
902 if (unlikely(family >= NPROTO))
904 read_lock(&xfrm_state_afinfo_lock);
905 afinfo = xfrm_state_afinfo[family];
906 if (likely(afinfo != NULL))
907 read_lock(&afinfo->lock);
908 read_unlock(&xfrm_state_afinfo_lock);
912 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
914 if (unlikely(afinfo == NULL))
916 read_unlock(&afinfo->lock);
919 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
920 void xfrm_state_delete_tunnel(struct xfrm_state *x)
923 struct xfrm_state *t = x->tunnel;
925 if (atomic_read(&t->tunnel_users) == 2)
926 xfrm_state_delete(t);
927 atomic_dec(&t->tunnel_users);
933 void __init xfrm_state_init(void)
937 for (i=0; i<XFRM_DST_HSIZE; i++) {
938 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
939 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
941 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);