6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
22 /* Each xfrm_state may be linked to two tables:
24 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25 2. Hash table by daddr to find what SAs exist for given
26 destination/tunnel endpoint. (output)
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
31 /* Hash table to find appropriate SA towards given target (endpoint
32 * of tunnel or destination of transport mode) allowed by selector.
34 * Main use is finding SA after policy selected tunnel or transport mode.
35 * Also, it can be used by ah/esp icmp error handler to find offending SA.
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
49 static void __xfrm_state_delete(struct xfrm_state *x);
51 static void xfrm_state_gc_destroy(struct xfrm_state *x)
53 if (del_timer(&x->timer))
64 x->type->destructor(x);
65 xfrm_put_type(x->type);
70 static void xfrm_state_gc_task(void *data)
73 struct list_head *entry, *tmp;
74 struct list_head gc_list = LIST_HEAD_INIT(gc_list);
76 spin_lock_bh(&xfrm_state_gc_lock);
77 list_splice_init(&xfrm_state_gc_list, &gc_list);
78 spin_unlock_bh(&xfrm_state_gc_lock);
80 list_for_each_safe(entry, tmp, &gc_list) {
81 x = list_entry(entry, struct xfrm_state, bydst);
82 xfrm_state_gc_destroy(x);
87 static inline unsigned long make_jiffies(long secs)
89 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
90 return MAX_SCHEDULE_TIMEOUT-1;
95 static void xfrm_timer_handler(unsigned long data)
97 struct xfrm_state *x = (struct xfrm_state*)data;
98 unsigned long now = (unsigned long)xtime.tv_sec;
103 if (x->km.state == XFRM_STATE_DEAD)
105 if (x->km.state == XFRM_STATE_EXPIRED)
107 if (x->lft.hard_add_expires_seconds) {
108 long tmo = x->lft.hard_add_expires_seconds +
109 x->curlft.add_time - now;
115 if (x->lft.hard_use_expires_seconds) {
116 long tmo = x->lft.hard_use_expires_seconds +
117 (x->curlft.use_time ? : now) - now;
125 if (x->lft.soft_add_expires_seconds) {
126 long tmo = x->lft.soft_add_expires_seconds +
127 x->curlft.add_time - now;
133 if (x->lft.soft_use_expires_seconds) {
134 long tmo = x->lft.soft_use_expires_seconds +
135 (x->curlft.use_time ? : now) - now;
143 km_state_expired(x, 0);
145 if (next != LONG_MAX &&
146 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
151 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
152 x->km.state = XFRM_STATE_EXPIRED;
158 km_state_expired(x, 1);
159 __xfrm_state_delete(x);
162 spin_unlock(&x->lock);
166 struct xfrm_state *xfrm_state_alloc(void)
168 struct xfrm_state *x;
170 x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
173 memset(x, 0, sizeof(struct xfrm_state));
174 atomic_set(&x->refcnt, 1);
175 atomic_set(&x->tunnel_users, 0);
176 INIT_LIST_HEAD(&x->bydst);
177 INIT_LIST_HEAD(&x->byspi);
178 init_timer(&x->timer);
179 x->timer.function = xfrm_timer_handler;
180 x->timer.data = (unsigned long)x;
181 x->curlft.add_time = (unsigned long)xtime.tv_sec;
182 x->lft.soft_byte_limit = XFRM_INF;
183 x->lft.soft_packet_limit = XFRM_INF;
184 x->lft.hard_byte_limit = XFRM_INF;
185 x->lft.hard_packet_limit = XFRM_INF;
186 x->lock = SPIN_LOCK_UNLOCKED;
191 void __xfrm_state_destroy(struct xfrm_state *x)
193 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
195 spin_lock_bh(&xfrm_state_gc_lock);
196 list_add(&x->bydst, &xfrm_state_gc_list);
197 spin_unlock_bh(&xfrm_state_gc_lock);
198 schedule_work(&xfrm_state_gc_work);
201 static void __xfrm_state_delete(struct xfrm_state *x)
203 if (x->km.state != XFRM_STATE_DEAD) {
204 x->km.state = XFRM_STATE_DEAD;
205 spin_lock(&xfrm_state_lock);
207 atomic_dec(&x->refcnt);
210 atomic_dec(&x->refcnt);
212 spin_unlock(&xfrm_state_lock);
213 if (del_timer(&x->timer))
214 atomic_dec(&x->refcnt);
216 /* The number two in this test is the reference
217 * mentioned in the comment below plus the reference
218 * our caller holds. A larger value means that
219 * there are DSTs attached to this xfrm_state.
221 if (atomic_read(&x->refcnt) > 2)
222 xfrm_flush_bundles();
224 /* All xfrm_state objects are created by xfrm_state_alloc.
225 * The xfrm_state_alloc call gives a reference, and that
226 * is what we are dropping here.
228 atomic_dec(&x->refcnt);
232 void xfrm_state_delete(struct xfrm_state *x)
234 spin_lock_bh(&x->lock);
235 __xfrm_state_delete(x);
236 spin_unlock_bh(&x->lock);
239 void xfrm_state_flush(u8 proto)
242 struct xfrm_state *x;
244 spin_lock_bh(&xfrm_state_lock);
245 for (i = 0; i < XFRM_DST_HSIZE; i++) {
247 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
248 if (!xfrm_state_kern(x) &&
249 (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
251 spin_unlock_bh(&xfrm_state_lock);
253 xfrm_state_delete(x);
256 spin_lock_bh(&xfrm_state_lock);
261 spin_unlock_bh(&xfrm_state_lock);
266 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
267 struct xfrm_tmpl *tmpl,
268 xfrm_address_t *daddr, xfrm_address_t *saddr,
269 unsigned short family)
271 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
274 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
275 xfrm_state_put_afinfo(afinfo);
280 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
281 struct flowi *fl, struct xfrm_tmpl *tmpl,
282 struct xfrm_policy *pol, int *err,
283 unsigned short family)
285 unsigned h = xfrm_dst_hash(daddr, family);
286 struct xfrm_state *x;
287 int acquire_in_progress = 0;
289 struct xfrm_state *best = NULL;
291 spin_lock_bh(&xfrm_state_lock);
292 list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
293 if (x->props.family == family &&
294 x->props.reqid == tmpl->reqid &&
295 xfrm_state_addr_check(x, daddr, saddr, family) &&
296 tmpl->mode == x->props.mode &&
297 tmpl->id.proto == x->id.proto) {
299 1. There is a valid state with matching selector.
301 2. Valid state with inappropriate selector. Skip.
303 Entering area of "sysdeps".
305 3. If state is not valid, selector is temporary,
306 it selects only session which triggered
307 previous resolution. Key manager will do
308 something to install a state with proper
311 if (x->km.state == XFRM_STATE_VALID) {
312 if (!xfrm_selector_match(&x->sel, fl, family))
315 best->km.dying > x->km.dying ||
316 (best->km.dying == x->km.dying &&
317 best->curlft.add_time < x->curlft.add_time))
319 } else if (x->km.state == XFRM_STATE_ACQ) {
320 acquire_in_progress = 1;
321 } else if (x->km.state == XFRM_STATE_ERROR ||
322 x->km.state == XFRM_STATE_EXPIRED) {
323 if (xfrm_selector_match(&x->sel, fl, family))
330 if (!x && !error && !acquire_in_progress &&
331 ((x = xfrm_state_alloc()) != NULL)) {
332 /* Initialize temporary selector matching only
333 * to current session. */
334 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
336 if (km_query(x, tmpl, pol) == 0) {
337 x->km.state = XFRM_STATE_ACQ;
338 list_add_tail(&x->bydst, xfrm_state_bydst+h);
341 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
342 list_add(&x->byspi, xfrm_state_byspi+h);
345 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
347 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
348 add_timer(&x->timer);
350 x->km.state = XFRM_STATE_DEAD;
359 *err = acquire_in_progress ? -EAGAIN :
360 (error ? -ESRCH : -ENOMEM);
361 spin_unlock_bh(&xfrm_state_lock);
365 static void __xfrm_state_insert(struct xfrm_state *x)
367 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
369 list_add(&x->bydst, xfrm_state_bydst+h);
372 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
374 list_add(&x->byspi, xfrm_state_byspi+h);
377 if (!mod_timer(&x->timer, jiffies + HZ))
383 void xfrm_state_insert(struct xfrm_state *x)
385 spin_lock_bh(&xfrm_state_lock);
386 __xfrm_state_insert(x);
387 spin_unlock_bh(&xfrm_state_lock);
390 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
392 int xfrm_state_add(struct xfrm_state *x)
394 struct xfrm_state_afinfo *afinfo;
395 struct xfrm_state *x1;
399 family = x->props.family;
400 afinfo = xfrm_state_get_afinfo(family);
401 if (unlikely(afinfo == NULL))
402 return -EAFNOSUPPORT;
404 spin_lock_bh(&xfrm_state_lock);
406 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
415 x1 = __xfrm_find_acq_byseq(x->km.seq);
416 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
423 x1 = afinfo->find_acq(
424 x->props.mode, x->props.reqid, x->id.proto,
425 &x->id.daddr, &x->props.saddr, 0);
427 __xfrm_state_insert(x);
431 spin_unlock_bh(&xfrm_state_lock);
432 xfrm_state_put_afinfo(afinfo);
435 xfrm_state_delete(x1);
442 int xfrm_state_update(struct xfrm_state *x)
444 struct xfrm_state_afinfo *afinfo;
445 struct xfrm_state *x1;
448 afinfo = xfrm_state_get_afinfo(x->props.family);
449 if (unlikely(afinfo == NULL))
450 return -EAFNOSUPPORT;
452 spin_lock_bh(&xfrm_state_lock);
453 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
459 if (xfrm_state_kern(x1)) {
465 if (x1->km.state == XFRM_STATE_ACQ) {
466 __xfrm_state_insert(x);
472 spin_unlock_bh(&xfrm_state_lock);
473 xfrm_state_put_afinfo(afinfo);
479 xfrm_state_delete(x1);
485 spin_lock_bh(&x1->lock);
486 if (likely(x1->km.state == XFRM_STATE_VALID)) {
487 if (x->encap && x1->encap)
488 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
489 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
492 if (!mod_timer(&x1->timer, jiffies + HZ))
494 if (x1->curlft.use_time)
495 xfrm_state_check_expire(x1);
499 spin_unlock_bh(&x1->lock);
506 int xfrm_state_check_expire(struct xfrm_state *x)
508 if (!x->curlft.use_time)
509 x->curlft.use_time = (unsigned long)xtime.tv_sec;
511 if (x->km.state != XFRM_STATE_VALID)
514 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
515 x->curlft.packets >= x->lft.hard_packet_limit) {
516 km_state_expired(x, 1);
517 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
523 (x->curlft.bytes >= x->lft.soft_byte_limit ||
524 x->curlft.packets >= x->lft.soft_packet_limit))
525 km_state_expired(x, 0);
529 int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
531 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
535 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
537 /* Check tail too... */
541 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
543 int err = xfrm_state_check_expire(x);
546 err = xfrm_state_check_space(x, skb);
552 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
553 unsigned short family)
555 struct xfrm_state *x;
556 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
560 spin_lock_bh(&xfrm_state_lock);
561 x = afinfo->state_lookup(daddr, spi, proto);
562 spin_unlock_bh(&xfrm_state_lock);
563 xfrm_state_put_afinfo(afinfo);
568 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
569 xfrm_address_t *daddr, xfrm_address_t *saddr,
570 int create, unsigned short family)
572 struct xfrm_state *x;
573 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
577 spin_lock_bh(&xfrm_state_lock);
578 x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
579 spin_unlock_bh(&xfrm_state_lock);
580 xfrm_state_put_afinfo(afinfo);
584 /* Silly enough, but I'm lazy to build resolution list */
586 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
589 struct xfrm_state *x;
591 for (i = 0; i < XFRM_DST_HSIZE; i++) {
592 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
593 if (x->km.seq == seq) {
602 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
604 struct xfrm_state *x;
606 spin_lock_bh(&xfrm_state_lock);
607 x = __xfrm_find_acq_byseq(seq);
608 spin_unlock_bh(&xfrm_state_lock);
612 u32 xfrm_get_acqseq(void)
616 static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
618 spin_lock_bh(&acqseq_lock);
619 res = (++acqseq ? : ++acqseq);
620 spin_unlock_bh(&acqseq_lock);
625 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
628 struct xfrm_state *x0;
633 if (minspi == maxspi) {
634 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
642 minspi = ntohl(minspi);
643 maxspi = ntohl(maxspi);
644 for (h=0; h<maxspi-minspi+1; h++) {
645 spi = minspi + net_random()%(maxspi-minspi+1);
646 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
648 x->id.spi = htonl(spi);
655 spin_lock_bh(&xfrm_state_lock);
656 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
657 list_add(&x->byspi, xfrm_state_byspi+h);
659 spin_unlock_bh(&xfrm_state_lock);
664 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
668 struct xfrm_state *x;
672 spin_lock_bh(&xfrm_state_lock);
673 for (i = 0; i < XFRM_DST_HSIZE; i++) {
674 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
675 if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
684 for (i = 0; i < XFRM_DST_HSIZE; i++) {
685 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
686 if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
688 err = func(x, --count, data);
694 spin_unlock_bh(&xfrm_state_lock);
699 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
705 if (unlikely(seq == 0))
708 if (likely(seq > x->replay.seq))
711 diff = x->replay.seq - seq;
712 if (diff >= x->props.replay_window) {
713 x->stats.replay_window++;
717 if (x->replay.bitmap & (1U << diff)) {
724 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
730 if (seq > x->replay.seq) {
731 diff = seq - x->replay.seq;
732 if (diff < x->props.replay_window)
733 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
735 x->replay.bitmap = 1;
738 diff = x->replay.seq - seq;
739 x->replay.bitmap |= (1U << diff);
743 int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
747 for (i=0; i<n; i++) {
749 match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
756 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
757 static rwlock_t xfrm_km_lock = RW_LOCK_UNLOCKED;
759 void km_state_expired(struct xfrm_state *x, int hard)
764 x->km.state = XFRM_STATE_EXPIRED;
768 read_lock(&xfrm_km_lock);
769 list_for_each_entry(km, &xfrm_km_list, list)
771 read_unlock(&xfrm_km_lock);
777 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
782 read_lock(&xfrm_km_lock);
783 list_for_each_entry(km, &xfrm_km_list, list) {
784 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
788 read_unlock(&xfrm_km_lock);
792 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
797 read_lock(&xfrm_km_lock);
798 list_for_each_entry(km, &xfrm_km_list, list) {
800 err = km->new_mapping(x, ipaddr, sport);
804 read_unlock(&xfrm_km_lock);
808 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
812 read_lock(&xfrm_km_lock);
813 list_for_each_entry(km, &xfrm_km_list, list)
814 if (km->notify_policy)
815 km->notify_policy(pol, dir, hard);
816 read_unlock(&xfrm_km_lock);
822 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
827 struct xfrm_policy *pol = NULL;
829 if (optlen <= 0 || optlen > PAGE_SIZE)
832 data = kmalloc(optlen, GFP_KERNEL);
837 if (copy_from_user(data, optval, optlen))
841 read_lock(&xfrm_km_lock);
842 list_for_each_entry(km, &xfrm_km_list, list) {
843 pol = km->compile_policy(sk->sk_family, optname, data,
848 read_unlock(&xfrm_km_lock);
851 xfrm_sk_policy_insert(sk, err, pol);
861 int xfrm_register_km(struct xfrm_mgr *km)
863 write_lock_bh(&xfrm_km_lock);
864 list_add_tail(&km->list, &xfrm_km_list);
865 write_unlock_bh(&xfrm_km_lock);
869 int xfrm_unregister_km(struct xfrm_mgr *km)
871 write_lock_bh(&xfrm_km_lock);
873 write_unlock_bh(&xfrm_km_lock);
877 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
880 if (unlikely(afinfo == NULL))
882 if (unlikely(afinfo->family >= NPROTO))
883 return -EAFNOSUPPORT;
884 write_lock(&xfrm_state_afinfo_lock);
885 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
888 afinfo->state_bydst = xfrm_state_bydst;
889 afinfo->state_byspi = xfrm_state_byspi;
890 xfrm_state_afinfo[afinfo->family] = afinfo;
892 write_unlock(&xfrm_state_afinfo_lock);
896 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
899 if (unlikely(afinfo == NULL))
901 if (unlikely(afinfo->family >= NPROTO))
902 return -EAFNOSUPPORT;
903 write_lock(&xfrm_state_afinfo_lock);
904 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
905 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
908 xfrm_state_afinfo[afinfo->family] = NULL;
909 afinfo->state_byspi = NULL;
910 afinfo->state_bydst = NULL;
913 write_unlock(&xfrm_state_afinfo_lock);
917 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
919 struct xfrm_state_afinfo *afinfo;
920 if (unlikely(family >= NPROTO))
922 read_lock(&xfrm_state_afinfo_lock);
923 afinfo = xfrm_state_afinfo[family];
924 if (likely(afinfo != NULL))
925 read_lock(&afinfo->lock);
926 read_unlock(&xfrm_state_afinfo_lock);
930 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
932 if (unlikely(afinfo == NULL))
934 read_unlock(&afinfo->lock);
937 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
938 void xfrm_state_delete_tunnel(struct xfrm_state *x)
941 struct xfrm_state *t = x->tunnel;
943 if (atomic_read(&t->tunnel_users) == 2)
944 xfrm_state_delete(t);
945 atomic_dec(&t->tunnel_users);
951 void __init xfrm_state_init(void)
955 for (i=0; i<XFRM_DST_HSIZE; i++) {
956 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
957 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
959 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);