6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
22 /* Each xfrm_state may be linked to two tables:
24 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25 2. Hash table by daddr to find what SAs exist for given
26 destination/tunnel endpoint. (output)
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
31 /* Hash table to find appropriate SA towards given target (endpoint
32 * of tunnel or destination of transport mode) allowed by selector.
34 * Main use is finding SA after policy selected tunnel or transport mode.
35 * Also, it can be used by ah/esp icmp error handler to find offending SA.
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
49 static void __xfrm_state_delete(struct xfrm_state *x);
51 static void xfrm_state_gc_destroy(struct xfrm_state *x)
53 if (del_timer(&x->timer))
64 x->type->destructor(x);
65 xfrm_put_type(x->type);
71 static void xfrm_state_gc_task(void *data)
74 struct list_head *entry, *tmp;
75 struct list_head gc_list = LIST_HEAD_INIT(gc_list);
77 spin_lock_bh(&xfrm_state_gc_lock);
78 list_splice_init(&xfrm_state_gc_list, &gc_list);
79 spin_unlock_bh(&xfrm_state_gc_lock);
81 list_for_each_safe(entry, tmp, &gc_list) {
82 x = list_entry(entry, struct xfrm_state, bydst);
83 xfrm_state_gc_destroy(x);
87 static inline unsigned long make_jiffies(long secs)
89 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
90 return MAX_SCHEDULE_TIMEOUT-1;
95 static void xfrm_timer_handler(unsigned long data)
97 struct xfrm_state *x = (struct xfrm_state*)data;
98 unsigned long now = (unsigned long)xtime.tv_sec;
103 if (x->km.state == XFRM_STATE_DEAD)
105 if (x->km.state == XFRM_STATE_EXPIRED)
107 if (x->lft.hard_add_expires_seconds) {
108 long tmo = x->lft.hard_add_expires_seconds +
109 x->curlft.add_time - now;
115 if (x->lft.hard_use_expires_seconds) {
116 long tmo = x->lft.hard_use_expires_seconds +
117 (x->curlft.use_time ? : now) - now;
125 if (x->lft.soft_add_expires_seconds) {
126 long tmo = x->lft.soft_add_expires_seconds +
127 x->curlft.add_time - now;
133 if (x->lft.soft_use_expires_seconds) {
134 long tmo = x->lft.soft_use_expires_seconds +
135 (x->curlft.use_time ? : now) - now;
143 km_state_expired(x, 0);
145 if (next != LONG_MAX &&
146 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
151 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
152 x->km.state = XFRM_STATE_EXPIRED;
158 km_state_expired(x, 1);
159 __xfrm_state_delete(x);
162 spin_unlock(&x->lock);
166 struct xfrm_state *xfrm_state_alloc(void)
168 struct xfrm_state *x;
170 x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
173 memset(x, 0, sizeof(struct xfrm_state));
174 atomic_set(&x->refcnt, 1);
175 atomic_set(&x->tunnel_users, 0);
176 INIT_LIST_HEAD(&x->bydst);
177 INIT_LIST_HEAD(&x->byspi);
178 init_timer(&x->timer);
179 x->timer.function = xfrm_timer_handler;
180 x->timer.data = (unsigned long)x;
181 x->curlft.add_time = (unsigned long)xtime.tv_sec;
182 x->lft.soft_byte_limit = XFRM_INF;
183 x->lft.soft_packet_limit = XFRM_INF;
184 x->lft.hard_byte_limit = XFRM_INF;
185 x->lft.hard_packet_limit = XFRM_INF;
186 x->lock = SPIN_LOCK_UNLOCKED;
191 void __xfrm_state_destroy(struct xfrm_state *x)
193 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
195 spin_lock_bh(&xfrm_state_gc_lock);
196 list_add(&x->bydst, &xfrm_state_gc_list);
197 spin_unlock_bh(&xfrm_state_gc_lock);
198 schedule_work(&xfrm_state_gc_work);
201 static void __xfrm_state_delete(struct xfrm_state *x)
203 if (x->km.state != XFRM_STATE_DEAD) {
204 x->km.state = XFRM_STATE_DEAD;
205 spin_lock(&xfrm_state_lock);
207 atomic_dec(&x->refcnt);
210 atomic_dec(&x->refcnt);
212 spin_unlock(&xfrm_state_lock);
213 if (del_timer(&x->timer))
214 atomic_dec(&x->refcnt);
216 /* The number two in this test is the reference
217 * mentioned in the comment below plus the reference
218 * our caller holds. A larger value means that
219 * there are DSTs attached to this xfrm_state.
221 if (atomic_read(&x->refcnt) > 2)
222 xfrm_flush_bundles();
224 /* All xfrm_state objects are created by xfrm_state_alloc.
225 * The xfrm_state_alloc call gives a reference, and that
226 * is what we are dropping here.
228 atomic_dec(&x->refcnt);
232 void xfrm_state_delete(struct xfrm_state *x)
234 xfrm_state_delete_tunnel(x);
235 spin_lock_bh(&x->lock);
236 __xfrm_state_delete(x);
237 spin_unlock_bh(&x->lock);
240 void xfrm_state_flush(u8 proto)
243 struct xfrm_state *x;
245 spin_lock_bh(&xfrm_state_lock);
246 for (i = 0; i < XFRM_DST_HSIZE; i++) {
248 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
249 if (!xfrm_state_kern(x) &&
250 (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
252 spin_unlock_bh(&xfrm_state_lock);
254 xfrm_state_delete(x);
257 spin_lock_bh(&xfrm_state_lock);
262 spin_unlock_bh(&xfrm_state_lock);
267 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
268 struct xfrm_tmpl *tmpl,
269 xfrm_address_t *daddr, xfrm_address_t *saddr,
270 unsigned short family)
272 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
275 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
276 xfrm_state_put_afinfo(afinfo);
281 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
282 struct flowi *fl, struct xfrm_tmpl *tmpl,
283 struct xfrm_policy *pol, int *err,
284 unsigned short family)
286 unsigned h = xfrm_dst_hash(daddr, family);
287 struct xfrm_state *x;
288 int acquire_in_progress = 0;
290 struct xfrm_state *best = NULL;
292 spin_lock_bh(&xfrm_state_lock);
293 list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
294 if (x->props.family == family &&
295 x->props.reqid == tmpl->reqid &&
296 xfrm_state_addr_check(x, daddr, saddr, family) &&
297 tmpl->mode == x->props.mode &&
298 tmpl->id.proto == x->id.proto) {
300 1. There is a valid state with matching selector.
302 2. Valid state with inappropriate selector. Skip.
304 Entering area of "sysdeps".
306 3. If state is not valid, selector is temporary,
307 it selects only session which triggered
308 previous resolution. Key manager will do
309 something to install a state with proper
312 if (x->km.state == XFRM_STATE_VALID) {
313 if (!xfrm_selector_match(&x->sel, fl, family))
316 best->km.dying > x->km.dying ||
317 (best->km.dying == x->km.dying &&
318 best->curlft.add_time < x->curlft.add_time))
320 } else if (x->km.state == XFRM_STATE_ACQ) {
321 acquire_in_progress = 1;
322 } else if (x->km.state == XFRM_STATE_ERROR ||
323 x->km.state == XFRM_STATE_EXPIRED) {
324 if (xfrm_selector_match(&x->sel, fl, family))
331 if (!x && !error && !acquire_in_progress &&
332 ((x = xfrm_state_alloc()) != NULL)) {
333 /* Initialize temporary selector matching only
334 * to current session. */
335 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
337 if (km_query(x, tmpl, pol) == 0) {
338 x->km.state = XFRM_STATE_ACQ;
339 list_add_tail(&x->bydst, xfrm_state_bydst+h);
342 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
343 list_add(&x->byspi, xfrm_state_byspi+h);
346 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
348 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
349 add_timer(&x->timer);
351 x->km.state = XFRM_STATE_DEAD;
360 *err = acquire_in_progress ? -EAGAIN :
361 (error ? -ESRCH : -ENOMEM);
362 spin_unlock_bh(&xfrm_state_lock);
366 static void __xfrm_state_insert(struct xfrm_state *x)
368 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
370 list_add(&x->bydst, xfrm_state_bydst+h);
373 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
375 list_add(&x->byspi, xfrm_state_byspi+h);
378 if (!mod_timer(&x->timer, jiffies + HZ))
384 void xfrm_state_insert(struct xfrm_state *x)
386 spin_lock_bh(&xfrm_state_lock);
387 __xfrm_state_insert(x);
388 spin_unlock_bh(&xfrm_state_lock);
391 int xfrm_state_add(struct xfrm_state *x)
393 struct xfrm_state_afinfo *afinfo;
394 struct xfrm_state *x1;
397 afinfo = xfrm_state_get_afinfo(x->props.family);
398 if (unlikely(afinfo == NULL))
399 return -EAFNOSUPPORT;
401 spin_lock_bh(&xfrm_state_lock);
403 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
405 x1 = afinfo->find_acq(
406 x->props.mode, x->props.reqid, x->id.proto,
407 &x->id.daddr, &x->props.saddr, 0);
408 if (x1 && x1->id.spi != x->id.spi && x1->id.spi) {
414 if (x1 && x1->id.spi) {
421 __xfrm_state_insert(x);
425 spin_unlock_bh(&xfrm_state_lock);
426 xfrm_state_put_afinfo(afinfo);
429 xfrm_state_delete(x1);
436 int xfrm_state_update(struct xfrm_state *x)
438 struct xfrm_state_afinfo *afinfo;
439 struct xfrm_state *x1;
442 afinfo = xfrm_state_get_afinfo(x->props.family);
443 if (unlikely(afinfo == NULL))
444 return -EAFNOSUPPORT;
446 spin_lock_bh(&xfrm_state_lock);
447 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
453 if (xfrm_state_kern(x1)) {
459 if (x1->km.state == XFRM_STATE_ACQ) {
460 __xfrm_state_insert(x);
466 spin_unlock_bh(&xfrm_state_lock);
467 xfrm_state_put_afinfo(afinfo);
473 xfrm_state_delete(x1);
479 spin_lock_bh(&x1->lock);
480 if (likely(x1->km.state == XFRM_STATE_VALID)) {
481 if (x->encap && x1->encap)
482 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
483 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
486 if (!mod_timer(&x1->timer, jiffies + HZ))
488 if (x1->curlft.use_time)
489 xfrm_state_check_expire(x1);
493 spin_unlock_bh(&x1->lock);
500 int xfrm_state_check_expire(struct xfrm_state *x)
502 if (!x->curlft.use_time)
503 x->curlft.use_time = (unsigned long)xtime.tv_sec;
505 if (x->km.state != XFRM_STATE_VALID)
508 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
509 x->curlft.packets >= x->lft.hard_packet_limit) {
510 km_state_expired(x, 1);
511 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
517 (x->curlft.bytes >= x->lft.soft_byte_limit ||
518 x->curlft.packets >= x->lft.soft_packet_limit))
519 km_state_expired(x, 0);
523 int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
525 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
529 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
531 /* Check tail too... */
536 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
537 unsigned short family)
539 struct xfrm_state *x;
540 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
544 spin_lock_bh(&xfrm_state_lock);
545 x = afinfo->state_lookup(daddr, spi, proto);
546 spin_unlock_bh(&xfrm_state_lock);
547 xfrm_state_put_afinfo(afinfo);
552 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
553 xfrm_address_t *daddr, xfrm_address_t *saddr,
554 int create, unsigned short family)
556 struct xfrm_state *x;
557 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
561 spin_lock_bh(&xfrm_state_lock);
562 x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
563 spin_unlock_bh(&xfrm_state_lock);
564 xfrm_state_put_afinfo(afinfo);
568 /* Silly enough, but I'm lazy to build resolution list */
570 struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
573 struct xfrm_state *x;
575 spin_lock_bh(&xfrm_state_lock);
576 for (i = 0; i < XFRM_DST_HSIZE; i++) {
577 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
578 if (x->km.seq == seq) {
580 spin_unlock_bh(&xfrm_state_lock);
585 spin_unlock_bh(&xfrm_state_lock);
589 u32 xfrm_get_acqseq(void)
593 static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
595 spin_lock_bh(&acqseq_lock);
596 res = (++acqseq ? : ++acqseq);
597 spin_unlock_bh(&acqseq_lock);
602 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
605 struct xfrm_state *x0;
610 if (minspi == maxspi) {
611 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
619 minspi = ntohl(minspi);
620 maxspi = ntohl(maxspi);
621 for (h=0; h<maxspi-minspi+1; h++) {
622 spi = minspi + net_random()%(maxspi-minspi+1);
623 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
628 x->id.spi = htonl(spi);
631 spin_lock_bh(&xfrm_state_lock);
632 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
633 list_add(&x->byspi, xfrm_state_byspi+h);
635 spin_unlock_bh(&xfrm_state_lock);
640 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
644 struct xfrm_state *x;
648 spin_lock_bh(&xfrm_state_lock);
649 for (i = 0; i < XFRM_DST_HSIZE; i++) {
650 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
651 if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
660 for (i = 0; i < XFRM_DST_HSIZE; i++) {
661 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
662 if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
664 err = func(x, --count, data);
670 spin_unlock_bh(&xfrm_state_lock);
675 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
681 if (unlikely(seq == 0))
684 if (likely(seq > x->replay.seq))
687 diff = x->replay.seq - seq;
688 if (diff >= x->props.replay_window) {
689 x->stats.replay_window++;
693 if (x->replay.bitmap & (1U << diff)) {
700 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
706 if (seq > x->replay.seq) {
707 diff = seq - x->replay.seq;
708 if (diff < x->props.replay_window)
709 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
711 x->replay.bitmap = 1;
714 diff = x->replay.seq - seq;
715 x->replay.bitmap |= (1U << diff);
719 int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
723 for (i=0; i<n; i++) {
725 match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
732 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
733 static rwlock_t xfrm_km_lock = RW_LOCK_UNLOCKED;
735 void km_state_expired(struct xfrm_state *x, int hard)
740 x->km.state = XFRM_STATE_EXPIRED;
744 read_lock(&xfrm_km_lock);
745 list_for_each_entry(km, &xfrm_km_list, list)
747 read_unlock(&xfrm_km_lock);
753 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
758 read_lock(&xfrm_km_lock);
759 list_for_each_entry(km, &xfrm_km_list, list) {
760 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
764 read_unlock(&xfrm_km_lock);
768 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
773 read_lock(&xfrm_km_lock);
774 list_for_each_entry(km, &xfrm_km_list, list) {
776 err = km->new_mapping(x, ipaddr, sport);
780 read_unlock(&xfrm_km_lock);
784 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
788 read_lock(&xfrm_km_lock);
789 list_for_each_entry(km, &xfrm_km_list, list)
790 if (km->notify_policy)
791 km->notify_policy(pol, dir, hard);
792 read_unlock(&xfrm_km_lock);
798 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
803 struct xfrm_policy *pol = NULL;
805 if (optlen <= 0 || optlen > PAGE_SIZE)
808 data = kmalloc(optlen, GFP_KERNEL);
813 if (copy_from_user(data, optval, optlen))
817 read_lock(&xfrm_km_lock);
818 list_for_each_entry(km, &xfrm_km_list, list) {
819 pol = km->compile_policy(sk->sk_family, optname, data,
824 read_unlock(&xfrm_km_lock);
827 xfrm_sk_policy_insert(sk, err, pol);
837 int xfrm_register_km(struct xfrm_mgr *km)
839 write_lock_bh(&xfrm_km_lock);
840 list_add_tail(&km->list, &xfrm_km_list);
841 write_unlock_bh(&xfrm_km_lock);
845 int xfrm_unregister_km(struct xfrm_mgr *km)
847 write_lock_bh(&xfrm_km_lock);
849 write_unlock_bh(&xfrm_km_lock);
853 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
856 if (unlikely(afinfo == NULL))
858 if (unlikely(afinfo->family >= NPROTO))
859 return -EAFNOSUPPORT;
860 write_lock(&xfrm_state_afinfo_lock);
861 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
864 afinfo->state_bydst = xfrm_state_bydst;
865 afinfo->state_byspi = xfrm_state_byspi;
866 xfrm_state_afinfo[afinfo->family] = afinfo;
868 write_unlock(&xfrm_state_afinfo_lock);
872 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
875 if (unlikely(afinfo == NULL))
877 if (unlikely(afinfo->family >= NPROTO))
878 return -EAFNOSUPPORT;
879 write_lock(&xfrm_state_afinfo_lock);
880 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
881 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
884 xfrm_state_afinfo[afinfo->family] = NULL;
885 afinfo->state_byspi = NULL;
886 afinfo->state_bydst = NULL;
889 write_unlock(&xfrm_state_afinfo_lock);
893 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
895 struct xfrm_state_afinfo *afinfo;
896 if (unlikely(family >= NPROTO))
898 read_lock(&xfrm_state_afinfo_lock);
899 afinfo = xfrm_state_afinfo[family];
900 if (likely(afinfo != NULL))
901 read_lock(&afinfo->lock);
902 read_unlock(&xfrm_state_afinfo_lock);
906 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
908 if (unlikely(afinfo == NULL))
910 read_unlock(&afinfo->lock);
913 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
914 void xfrm_state_delete_tunnel(struct xfrm_state *x)
917 struct xfrm_state *t = x->tunnel;
919 if (atomic_read(&t->tunnel_users) == 2)
920 xfrm_state_delete(t);
921 atomic_dec(&t->tunnel_users);
927 void __init xfrm_state_init(void)
931 for (i=0; i<XFRM_DST_HSIZE; i++) {
932 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
933 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
935 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);