6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
22 /* Each xfrm_state may be linked to two tables:
24 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25 2. Hash table by daddr to find what SAs exist for given
26 destination/tunnel endpoint. (output)
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
31 /* Hash table to find appropriate SA towards given target (endpoint
32 * of tunnel or destination of transport mode) allowed by selector.
34 * Main use is finding SA after policy selected tunnel or transport mode.
35 * Also, it can be used by ah/esp icmp error handler to find offending SA.
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
49 static void __xfrm_state_delete(struct xfrm_state *x);
51 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
52 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
54 static void xfrm_state_gc_destroy(struct xfrm_state *x)
56 if (del_timer(&x->timer))
67 x->type->destructor(x);
68 xfrm_put_type(x->type);
73 static void xfrm_state_gc_task(void *data)
76 struct list_head *entry, *tmp;
77 struct list_head gc_list = LIST_HEAD_INIT(gc_list);
79 spin_lock_bh(&xfrm_state_gc_lock);
80 list_splice_init(&xfrm_state_gc_list, &gc_list);
81 spin_unlock_bh(&xfrm_state_gc_lock);
83 list_for_each_safe(entry, tmp, &gc_list) {
84 x = list_entry(entry, struct xfrm_state, bydst);
85 xfrm_state_gc_destroy(x);
90 static inline unsigned long make_jiffies(long secs)
92 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
93 return MAX_SCHEDULE_TIMEOUT-1;
98 static void xfrm_timer_handler(unsigned long data)
100 struct xfrm_state *x = (struct xfrm_state*)data;
101 unsigned long now = (unsigned long)xtime.tv_sec;
102 long next = LONG_MAX;
106 if (x->km.state == XFRM_STATE_DEAD)
108 if (x->km.state == XFRM_STATE_EXPIRED)
110 if (x->lft.hard_add_expires_seconds) {
111 long tmo = x->lft.hard_add_expires_seconds +
112 x->curlft.add_time - now;
118 if (x->lft.hard_use_expires_seconds) {
119 long tmo = x->lft.hard_use_expires_seconds +
120 (x->curlft.use_time ? : now) - now;
128 if (x->lft.soft_add_expires_seconds) {
129 long tmo = x->lft.soft_add_expires_seconds +
130 x->curlft.add_time - now;
136 if (x->lft.soft_use_expires_seconds) {
137 long tmo = x->lft.soft_use_expires_seconds +
138 (x->curlft.use_time ? : now) - now;
146 km_state_expired(x, 0);
148 if (next != LONG_MAX &&
149 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
154 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
155 x->km.state = XFRM_STATE_EXPIRED;
161 km_state_expired(x, 1);
162 __xfrm_state_delete(x);
165 spin_unlock(&x->lock);
169 struct xfrm_state *xfrm_state_alloc(void)
171 struct xfrm_state *x;
173 x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
176 memset(x, 0, sizeof(struct xfrm_state));
177 atomic_set(&x->refcnt, 1);
178 atomic_set(&x->tunnel_users, 0);
179 INIT_LIST_HEAD(&x->bydst);
180 INIT_LIST_HEAD(&x->byspi);
181 init_timer(&x->timer);
182 x->timer.function = xfrm_timer_handler;
183 x->timer.data = (unsigned long)x;
184 x->curlft.add_time = (unsigned long)xtime.tv_sec;
185 x->lft.soft_byte_limit = XFRM_INF;
186 x->lft.soft_packet_limit = XFRM_INF;
187 x->lft.hard_byte_limit = XFRM_INF;
188 x->lft.hard_packet_limit = XFRM_INF;
189 spin_lock_init(&x->lock);
194 void __xfrm_state_destroy(struct xfrm_state *x)
196 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
198 spin_lock_bh(&xfrm_state_gc_lock);
199 list_add(&x->bydst, &xfrm_state_gc_list);
200 spin_unlock_bh(&xfrm_state_gc_lock);
201 schedule_work(&xfrm_state_gc_work);
204 static void __xfrm_state_delete(struct xfrm_state *x)
206 if (x->km.state != XFRM_STATE_DEAD) {
207 x->km.state = XFRM_STATE_DEAD;
208 spin_lock(&xfrm_state_lock);
210 atomic_dec(&x->refcnt);
213 atomic_dec(&x->refcnt);
215 spin_unlock(&xfrm_state_lock);
216 if (del_timer(&x->timer))
217 atomic_dec(&x->refcnt);
219 /* The number two in this test is the reference
220 * mentioned in the comment below plus the reference
221 * our caller holds. A larger value means that
222 * there are DSTs attached to this xfrm_state.
224 if (atomic_read(&x->refcnt) > 2)
225 xfrm_flush_bundles();
227 /* All xfrm_state objects are created by xfrm_state_alloc.
228 * The xfrm_state_alloc call gives a reference, and that
229 * is what we are dropping here.
231 atomic_dec(&x->refcnt);
235 void xfrm_state_delete(struct xfrm_state *x)
237 spin_lock_bh(&x->lock);
238 __xfrm_state_delete(x);
239 spin_unlock_bh(&x->lock);
242 void xfrm_state_flush(u8 proto)
245 struct xfrm_state *x;
247 spin_lock_bh(&xfrm_state_lock);
248 for (i = 0; i < XFRM_DST_HSIZE; i++) {
250 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
251 if (!xfrm_state_kern(x) &&
252 (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
254 spin_unlock_bh(&xfrm_state_lock);
256 xfrm_state_delete(x);
259 spin_lock_bh(&xfrm_state_lock);
264 spin_unlock_bh(&xfrm_state_lock);
269 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
270 struct xfrm_tmpl *tmpl,
271 xfrm_address_t *daddr, xfrm_address_t *saddr,
272 unsigned short family)
274 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
277 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
278 xfrm_state_put_afinfo(afinfo);
283 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
284 struct flowi *fl, struct xfrm_tmpl *tmpl,
285 struct xfrm_policy *pol, int *err,
286 unsigned short family)
288 unsigned h = xfrm_dst_hash(daddr, family);
289 struct xfrm_state *x;
290 int acquire_in_progress = 0;
292 struct xfrm_state *best = NULL;
294 spin_lock_bh(&xfrm_state_lock);
295 list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
296 if (x->props.family == family &&
297 x->props.reqid == tmpl->reqid &&
298 xfrm_state_addr_check(x, daddr, saddr, family) &&
299 tmpl->mode == x->props.mode &&
300 tmpl->id.proto == x->id.proto) {
302 1. There is a valid state with matching selector.
304 2. Valid state with inappropriate selector. Skip.
306 Entering area of "sysdeps".
308 3. If state is not valid, selector is temporary,
309 it selects only session which triggered
310 previous resolution. Key manager will do
311 something to install a state with proper
314 if (x->km.state == XFRM_STATE_VALID) {
315 if (!xfrm_selector_match(&x->sel, fl, family))
318 best->km.dying > x->km.dying ||
319 (best->km.dying == x->km.dying &&
320 best->curlft.add_time < x->curlft.add_time))
322 } else if (x->km.state == XFRM_STATE_ACQ) {
323 acquire_in_progress = 1;
324 } else if (x->km.state == XFRM_STATE_ERROR ||
325 x->km.state == XFRM_STATE_EXPIRED) {
326 if (xfrm_selector_match(&x->sel, fl, family))
333 if (!x && !error && !acquire_in_progress &&
334 ((x = xfrm_state_alloc()) != NULL)) {
335 /* Initialize temporary selector matching only
336 * to current session. */
337 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
339 if (km_query(x, tmpl, pol) == 0) {
340 x->km.state = XFRM_STATE_ACQ;
341 list_add_tail(&x->bydst, xfrm_state_bydst+h);
344 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
345 list_add(&x->byspi, xfrm_state_byspi+h);
348 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
350 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
351 add_timer(&x->timer);
353 x->km.state = XFRM_STATE_DEAD;
362 *err = acquire_in_progress ? -EAGAIN :
363 (error ? -ESRCH : -ENOMEM);
364 spin_unlock_bh(&xfrm_state_lock);
368 static void __xfrm_state_insert(struct xfrm_state *x)
370 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
372 list_add(&x->bydst, xfrm_state_bydst+h);
375 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
377 list_add(&x->byspi, xfrm_state_byspi+h);
380 if (!mod_timer(&x->timer, jiffies + HZ))
386 void xfrm_state_insert(struct xfrm_state *x)
388 spin_lock_bh(&xfrm_state_lock);
389 __xfrm_state_insert(x);
390 spin_unlock_bh(&xfrm_state_lock);
393 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
395 int xfrm_state_add(struct xfrm_state *x)
397 struct xfrm_state_afinfo *afinfo;
398 struct xfrm_state *x1;
402 family = x->props.family;
403 afinfo = xfrm_state_get_afinfo(family);
404 if (unlikely(afinfo == NULL))
405 return -EAFNOSUPPORT;
407 spin_lock_bh(&xfrm_state_lock);
409 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
418 x1 = __xfrm_find_acq_byseq(x->km.seq);
419 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
426 x1 = afinfo->find_acq(
427 x->props.mode, x->props.reqid, x->id.proto,
428 &x->id.daddr, &x->props.saddr, 0);
430 __xfrm_state_insert(x);
434 spin_unlock_bh(&xfrm_state_lock);
435 xfrm_state_put_afinfo(afinfo);
438 xfrm_state_delete(x1);
445 int xfrm_state_update(struct xfrm_state *x)
447 struct xfrm_state_afinfo *afinfo;
448 struct xfrm_state *x1;
451 afinfo = xfrm_state_get_afinfo(x->props.family);
452 if (unlikely(afinfo == NULL))
453 return -EAFNOSUPPORT;
455 spin_lock_bh(&xfrm_state_lock);
456 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
462 if (xfrm_state_kern(x1)) {
468 if (x1->km.state == XFRM_STATE_ACQ) {
469 __xfrm_state_insert(x);
475 spin_unlock_bh(&xfrm_state_lock);
476 xfrm_state_put_afinfo(afinfo);
482 xfrm_state_delete(x1);
488 spin_lock_bh(&x1->lock);
489 if (likely(x1->km.state == XFRM_STATE_VALID)) {
490 if (x->encap && x1->encap)
491 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
492 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
495 if (!mod_timer(&x1->timer, jiffies + HZ))
497 if (x1->curlft.use_time)
498 xfrm_state_check_expire(x1);
502 spin_unlock_bh(&x1->lock);
509 int xfrm_state_check_expire(struct xfrm_state *x)
511 if (!x->curlft.use_time)
512 x->curlft.use_time = (unsigned long)xtime.tv_sec;
514 if (x->km.state != XFRM_STATE_VALID)
517 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
518 x->curlft.packets >= x->lft.hard_packet_limit) {
519 km_state_expired(x, 1);
520 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
526 (x->curlft.bytes >= x->lft.soft_byte_limit ||
527 x->curlft.packets >= x->lft.soft_packet_limit))
528 km_state_expired(x, 0);
532 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
534 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
538 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
540 /* Check tail too... */
544 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
546 int err = xfrm_state_check_expire(x);
549 err = xfrm_state_check_space(x, skb);
555 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
556 unsigned short family)
558 struct xfrm_state *x;
559 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
563 spin_lock_bh(&xfrm_state_lock);
564 x = afinfo->state_lookup(daddr, spi, proto);
565 spin_unlock_bh(&xfrm_state_lock);
566 xfrm_state_put_afinfo(afinfo);
571 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
572 xfrm_address_t *daddr, xfrm_address_t *saddr,
573 int create, unsigned short family)
575 struct xfrm_state *x;
576 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
580 spin_lock_bh(&xfrm_state_lock);
581 x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
582 spin_unlock_bh(&xfrm_state_lock);
583 xfrm_state_put_afinfo(afinfo);
587 /* Silly enough, but I'm lazy to build resolution list */
589 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
592 struct xfrm_state *x;
594 for (i = 0; i < XFRM_DST_HSIZE; i++) {
595 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
596 if (x->km.seq == seq) {
605 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
607 struct xfrm_state *x;
609 spin_lock_bh(&xfrm_state_lock);
610 x = __xfrm_find_acq_byseq(seq);
611 spin_unlock_bh(&xfrm_state_lock);
615 u32 xfrm_get_acqseq(void)
619 static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
621 spin_lock_bh(&acqseq_lock);
622 res = (++acqseq ? : ++acqseq);
623 spin_unlock_bh(&acqseq_lock);
628 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
631 struct xfrm_state *x0;
636 if (minspi == maxspi) {
637 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
645 minspi = ntohl(minspi);
646 maxspi = ntohl(maxspi);
647 for (h=0; h<maxspi-minspi+1; h++) {
648 spi = minspi + net_random()%(maxspi-minspi+1);
649 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
651 x->id.spi = htonl(spi);
658 spin_lock_bh(&xfrm_state_lock);
659 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
660 list_add(&x->byspi, xfrm_state_byspi+h);
662 spin_unlock_bh(&xfrm_state_lock);
667 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
671 struct xfrm_state *x;
675 spin_lock_bh(&xfrm_state_lock);
676 for (i = 0; i < XFRM_DST_HSIZE; i++) {
677 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
678 if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
687 for (i = 0; i < XFRM_DST_HSIZE; i++) {
688 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
689 if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
691 err = func(x, --count, data);
697 spin_unlock_bh(&xfrm_state_lock);
702 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
708 if (unlikely(seq == 0))
711 if (likely(seq > x->replay.seq))
714 diff = x->replay.seq - seq;
715 if (diff >= x->props.replay_window) {
716 x->stats.replay_window++;
720 if (x->replay.bitmap & (1U << diff)) {
727 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
733 if (seq > x->replay.seq) {
734 diff = seq - x->replay.seq;
735 if (diff < x->props.replay_window)
736 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
738 x->replay.bitmap = 1;
741 diff = x->replay.seq - seq;
742 x->replay.bitmap |= (1U << diff);
746 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
747 static rwlock_t xfrm_km_lock = RW_LOCK_UNLOCKED;
749 void km_state_expired(struct xfrm_state *x, int hard)
754 x->km.state = XFRM_STATE_EXPIRED;
758 read_lock(&xfrm_km_lock);
759 list_for_each_entry(km, &xfrm_km_list, list)
761 read_unlock(&xfrm_km_lock);
767 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
772 read_lock(&xfrm_km_lock);
773 list_for_each_entry(km, &xfrm_km_list, list) {
774 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
778 read_unlock(&xfrm_km_lock);
782 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
787 read_lock(&xfrm_km_lock);
788 list_for_each_entry(km, &xfrm_km_list, list) {
790 err = km->new_mapping(x, ipaddr, sport);
794 read_unlock(&xfrm_km_lock);
798 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
802 read_lock(&xfrm_km_lock);
803 list_for_each_entry(km, &xfrm_km_list, list)
804 if (km->notify_policy)
805 km->notify_policy(pol, dir, hard);
806 read_unlock(&xfrm_km_lock);
812 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
817 struct xfrm_policy *pol = NULL;
819 if (optlen <= 0 || optlen > PAGE_SIZE)
822 data = kmalloc(optlen, GFP_KERNEL);
827 if (copy_from_user(data, optval, optlen))
831 read_lock(&xfrm_km_lock);
832 list_for_each_entry(km, &xfrm_km_list, list) {
833 pol = km->compile_policy(sk->sk_family, optname, data,
838 read_unlock(&xfrm_km_lock);
841 xfrm_sk_policy_insert(sk, err, pol);
851 int xfrm_register_km(struct xfrm_mgr *km)
853 write_lock_bh(&xfrm_km_lock);
854 list_add_tail(&km->list, &xfrm_km_list);
855 write_unlock_bh(&xfrm_km_lock);
859 int xfrm_unregister_km(struct xfrm_mgr *km)
861 write_lock_bh(&xfrm_km_lock);
863 write_unlock_bh(&xfrm_km_lock);
867 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
870 if (unlikely(afinfo == NULL))
872 if (unlikely(afinfo->family >= NPROTO))
873 return -EAFNOSUPPORT;
874 write_lock(&xfrm_state_afinfo_lock);
875 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
878 afinfo->state_bydst = xfrm_state_bydst;
879 afinfo->state_byspi = xfrm_state_byspi;
880 xfrm_state_afinfo[afinfo->family] = afinfo;
882 write_unlock(&xfrm_state_afinfo_lock);
886 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
889 if (unlikely(afinfo == NULL))
891 if (unlikely(afinfo->family >= NPROTO))
892 return -EAFNOSUPPORT;
893 write_lock(&xfrm_state_afinfo_lock);
894 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
895 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
898 xfrm_state_afinfo[afinfo->family] = NULL;
899 afinfo->state_byspi = NULL;
900 afinfo->state_bydst = NULL;
903 write_unlock(&xfrm_state_afinfo_lock);
907 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
909 struct xfrm_state_afinfo *afinfo;
910 if (unlikely(family >= NPROTO))
912 read_lock(&xfrm_state_afinfo_lock);
913 afinfo = xfrm_state_afinfo[family];
914 if (likely(afinfo != NULL))
915 read_lock(&afinfo->lock);
916 read_unlock(&xfrm_state_afinfo_lock);
920 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
922 if (unlikely(afinfo == NULL))
924 read_unlock(&afinfo->lock);
927 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
928 void xfrm_state_delete_tunnel(struct xfrm_state *x)
931 struct xfrm_state *t = x->tunnel;
933 if (atomic_read(&t->tunnel_users) == 2)
934 xfrm_state_delete(t);
935 atomic_dec(&t->tunnel_users);
941 void __init xfrm_state_init(void)
945 for (i=0; i<XFRM_DST_HSIZE; i++) {
946 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
947 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
949 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);