upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *      
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
21
22 /* Each xfrm_state may be linked to two tables:
23
24    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25    2. Hash table by daddr to find what SAs exist for given
26       destination/tunnel endpoint. (output)
27  */
28
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
30
31 /* Hash table to find appropriate SA towards given target (endpoint
32  * of tunnel or destination of transport mode) allowed by selector.
33  *
34  * Main use is finding SA after policy selected tunnel or transport mode.
35  * Also, it can be used by ah/esp icmp error handler to find offending SA.
36  */
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
39
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
41
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
44
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
48
49 static void __xfrm_state_delete(struct xfrm_state *x);
50
51 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
52 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
53
54 static void xfrm_state_gc_destroy(struct xfrm_state *x)
55 {
56         if (del_timer(&x->timer))
57                 BUG();
58         if (x->aalg)
59                 kfree(x->aalg);
60         if (x->ealg)
61                 kfree(x->ealg);
62         if (x->calg)
63                 kfree(x->calg);
64         if (x->encap)
65                 kfree(x->encap);
66         if (x->type) {
67                 x->type->destructor(x);
68                 xfrm_put_type(x->type);
69         }
70         kfree(x);
71 }
72
73 static void xfrm_state_gc_task(void *data)
74 {
75         struct xfrm_state *x;
76         struct list_head *entry, *tmp;
77         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
78
79         spin_lock_bh(&xfrm_state_gc_lock);
80         list_splice_init(&xfrm_state_gc_list, &gc_list);
81         spin_unlock_bh(&xfrm_state_gc_lock);
82
83         list_for_each_safe(entry, tmp, &gc_list) {
84                 x = list_entry(entry, struct xfrm_state, bydst);
85                 xfrm_state_gc_destroy(x);
86         }
87         wake_up(&km_waitq);
88 }
89
90 static inline unsigned long make_jiffies(long secs)
91 {
92         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
93                 return MAX_SCHEDULE_TIMEOUT-1;
94         else
95                 return secs*HZ;
96 }
97
98 static void xfrm_timer_handler(unsigned long data)
99 {
100         struct xfrm_state *x = (struct xfrm_state*)data;
101         unsigned long now = (unsigned long)xtime.tv_sec;
102         long next = LONG_MAX;
103         int warn = 0;
104
105         spin_lock(&x->lock);
106         if (x->km.state == XFRM_STATE_DEAD)
107                 goto out;
108         if (x->km.state == XFRM_STATE_EXPIRED)
109                 goto expired;
110         if (x->lft.hard_add_expires_seconds) {
111                 long tmo = x->lft.hard_add_expires_seconds +
112                         x->curlft.add_time - now;
113                 if (tmo <= 0)
114                         goto expired;
115                 if (tmo < next)
116                         next = tmo;
117         }
118         if (x->lft.hard_use_expires_seconds) {
119                 long tmo = x->lft.hard_use_expires_seconds +
120                         (x->curlft.use_time ? : now) - now;
121                 if (tmo <= 0)
122                         goto expired;
123                 if (tmo < next)
124                         next = tmo;
125         }
126         if (x->km.dying)
127                 goto resched;
128         if (x->lft.soft_add_expires_seconds) {
129                 long tmo = x->lft.soft_add_expires_seconds +
130                         x->curlft.add_time - now;
131                 if (tmo <= 0)
132                         warn = 1;
133                 else if (tmo < next)
134                         next = tmo;
135         }
136         if (x->lft.soft_use_expires_seconds) {
137                 long tmo = x->lft.soft_use_expires_seconds +
138                         (x->curlft.use_time ? : now) - now;
139                 if (tmo <= 0)
140                         warn = 1;
141                 else if (tmo < next)
142                         next = tmo;
143         }
144
145         if (warn)
146                 km_state_expired(x, 0);
147 resched:
148         if (next != LONG_MAX &&
149             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
150                 xfrm_state_hold(x);
151         goto out;
152
153 expired:
154         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
155                 x->km.state = XFRM_STATE_EXPIRED;
156                 wake_up(&km_waitq);
157                 next = 2;
158                 goto resched;
159         }
160         if (x->id.spi != 0)
161                 km_state_expired(x, 1);
162         __xfrm_state_delete(x);
163
164 out:
165         spin_unlock(&x->lock);
166         xfrm_state_put(x);
167 }
168
169 struct xfrm_state *xfrm_state_alloc(void)
170 {
171         struct xfrm_state *x;
172
173         x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
174
175         if (x) {
176                 memset(x, 0, sizeof(struct xfrm_state));
177                 atomic_set(&x->refcnt, 1);
178                 atomic_set(&x->tunnel_users, 0);
179                 INIT_LIST_HEAD(&x->bydst);
180                 INIT_LIST_HEAD(&x->byspi);
181                 init_timer(&x->timer);
182                 x->timer.function = xfrm_timer_handler;
183                 x->timer.data     = (unsigned long)x;
184                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
185                 x->lft.soft_byte_limit = XFRM_INF;
186                 x->lft.soft_packet_limit = XFRM_INF;
187                 x->lft.hard_byte_limit = XFRM_INF;
188                 x->lft.hard_packet_limit = XFRM_INF;
189                 spin_lock_init(&x->lock);
190         }
191         return x;
192 }
193
194 void __xfrm_state_destroy(struct xfrm_state *x)
195 {
196         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
197
198         spin_lock_bh(&xfrm_state_gc_lock);
199         list_add(&x->bydst, &xfrm_state_gc_list);
200         spin_unlock_bh(&xfrm_state_gc_lock);
201         schedule_work(&xfrm_state_gc_work);
202 }
203
204 static void __xfrm_state_delete(struct xfrm_state *x)
205 {
206         if (x->km.state != XFRM_STATE_DEAD) {
207                 x->km.state = XFRM_STATE_DEAD;
208                 spin_lock(&xfrm_state_lock);
209                 list_del(&x->bydst);
210                 atomic_dec(&x->refcnt);
211                 if (x->id.spi) {
212                         list_del(&x->byspi);
213                         atomic_dec(&x->refcnt);
214                 }
215                 spin_unlock(&xfrm_state_lock);
216                 if (del_timer(&x->timer))
217                         atomic_dec(&x->refcnt);
218
219                 /* The number two in this test is the reference
220                  * mentioned in the comment below plus the reference
221                  * our caller holds.  A larger value means that
222                  * there are DSTs attached to this xfrm_state.
223                  */
224                 if (atomic_read(&x->refcnt) > 2)
225                         xfrm_flush_bundles();
226
227                 /* All xfrm_state objects are created by xfrm_state_alloc.
228                  * The xfrm_state_alloc call gives a reference, and that
229                  * is what we are dropping here.
230                  */
231                 atomic_dec(&x->refcnt);
232         }
233 }
234
235 void xfrm_state_delete(struct xfrm_state *x)
236 {
237         spin_lock_bh(&x->lock);
238         __xfrm_state_delete(x);
239         spin_unlock_bh(&x->lock);
240 }
241
242 void xfrm_state_flush(u8 proto)
243 {
244         int i;
245         struct xfrm_state *x;
246
247         spin_lock_bh(&xfrm_state_lock);
248         for (i = 0; i < XFRM_DST_HSIZE; i++) {
249 restart:
250                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
251                         if (!xfrm_state_kern(x) &&
252                             (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
253                                 xfrm_state_hold(x);
254                                 spin_unlock_bh(&xfrm_state_lock);
255
256                                 xfrm_state_delete(x);
257                                 xfrm_state_put(x);
258
259                                 spin_lock_bh(&xfrm_state_lock);
260                                 goto restart;
261                         }
262                 }
263         }
264         spin_unlock_bh(&xfrm_state_lock);
265         wake_up(&km_waitq);
266 }
267
268 static int
269 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
270                   struct xfrm_tmpl *tmpl,
271                   xfrm_address_t *daddr, xfrm_address_t *saddr,
272                   unsigned short family)
273 {
274         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
275         if (!afinfo)
276                 return -1;
277         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
278         xfrm_state_put_afinfo(afinfo);
279         return 0;
280 }
281
282 struct xfrm_state *
283 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
284                 struct flowi *fl, struct xfrm_tmpl *tmpl,
285                 struct xfrm_policy *pol, int *err,
286                 unsigned short family)
287 {
288         unsigned h = xfrm_dst_hash(daddr, family);
289         struct xfrm_state *x;
290         int acquire_in_progress = 0;
291         int error = 0;
292         struct xfrm_state *best = NULL;
293
294         spin_lock_bh(&xfrm_state_lock);
295         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
296                 if (x->props.family == family &&
297                     x->props.reqid == tmpl->reqid &&
298                     xfrm_state_addr_check(x, daddr, saddr, family) &&
299                     tmpl->mode == x->props.mode &&
300                     tmpl->id.proto == x->id.proto) {
301                         /* Resolution logic:
302                            1. There is a valid state with matching selector.
303                               Done.
304                            2. Valid state with inappropriate selector. Skip.
305
306                            Entering area of "sysdeps".
307
308                            3. If state is not valid, selector is temporary,
309                               it selects only session which triggered
310                               previous resolution. Key manager will do
311                               something to install a state with proper
312                               selector.
313                          */
314                         if (x->km.state == XFRM_STATE_VALID) {
315                                 if (!xfrm_selector_match(&x->sel, fl, family))
316                                         continue;
317                                 if (!best ||
318                                     best->km.dying > x->km.dying ||
319                                     (best->km.dying == x->km.dying &&
320                                      best->curlft.add_time < x->curlft.add_time))
321                                         best = x;
322                         } else if (x->km.state == XFRM_STATE_ACQ) {
323                                 acquire_in_progress = 1;
324                         } else if (x->km.state == XFRM_STATE_ERROR ||
325                                    x->km.state == XFRM_STATE_EXPIRED) {
326                                 if (xfrm_selector_match(&x->sel, fl, family))
327                                         error = 1;
328                         }
329                 }
330         }
331
332         x = best;
333         if (!x && !error && !acquire_in_progress &&
334             ((x = xfrm_state_alloc()) != NULL)) {
335                 /* Initialize temporary selector matching only
336                  * to current session. */
337                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
338
339                 if (km_query(x, tmpl, pol) == 0) {
340                         x->km.state = XFRM_STATE_ACQ;
341                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
342                         xfrm_state_hold(x);
343                         if (x->id.spi) {
344                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
345                                 list_add(&x->byspi, xfrm_state_byspi+h);
346                                 xfrm_state_hold(x);
347                         }
348                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
349                         xfrm_state_hold(x);
350                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
351                         add_timer(&x->timer);
352                 } else {
353                         x->km.state = XFRM_STATE_DEAD;
354                         xfrm_state_put(x);
355                         x = NULL;
356                         error = 1;
357                 }
358         }
359         if (x)
360                 xfrm_state_hold(x);
361         else
362                 *err = acquire_in_progress ? -EAGAIN :
363                         (error ? -ESRCH : -ENOMEM);
364         spin_unlock_bh(&xfrm_state_lock);
365         return x;
366 }
367
368 static void __xfrm_state_insert(struct xfrm_state *x)
369 {
370         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
371
372         list_add(&x->bydst, xfrm_state_bydst+h);
373         xfrm_state_hold(x);
374
375         h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
376
377         list_add(&x->byspi, xfrm_state_byspi+h);
378         xfrm_state_hold(x);
379
380         if (!mod_timer(&x->timer, jiffies + HZ))
381                 xfrm_state_hold(x);
382
383         wake_up(&km_waitq);
384 }
385
386 void xfrm_state_insert(struct xfrm_state *x)
387 {
388         spin_lock_bh(&xfrm_state_lock);
389         __xfrm_state_insert(x);
390         spin_unlock_bh(&xfrm_state_lock);
391 }
392
393 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
394
395 int xfrm_state_add(struct xfrm_state *x)
396 {
397         struct xfrm_state_afinfo *afinfo;
398         struct xfrm_state *x1;
399         int family;
400         int err;
401
402         family = x->props.family;
403         afinfo = xfrm_state_get_afinfo(family);
404         if (unlikely(afinfo == NULL))
405                 return -EAFNOSUPPORT;
406
407         spin_lock_bh(&xfrm_state_lock);
408
409         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
410         if (x1) {
411                 xfrm_state_put(x1);
412                 x1 = NULL;
413                 err = -EEXIST;
414                 goto out;
415         }
416
417         if (x->km.seq) {
418                 x1 = __xfrm_find_acq_byseq(x->km.seq);
419                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
420                         xfrm_state_put(x1);
421                         x1 = NULL;
422                 }
423         }
424
425         if (!x1)
426                 x1 = afinfo->find_acq(
427                         x->props.mode, x->props.reqid, x->id.proto,
428                         &x->id.daddr, &x->props.saddr, 0);
429
430         __xfrm_state_insert(x);
431         err = 0;
432
433 out:
434         spin_unlock_bh(&xfrm_state_lock);
435         xfrm_state_put_afinfo(afinfo);
436
437         if (x1) {
438                 xfrm_state_delete(x1);
439                 xfrm_state_put(x1);
440         }
441
442         return err;
443 }
444
445 int xfrm_state_update(struct xfrm_state *x)
446 {
447         struct xfrm_state_afinfo *afinfo;
448         struct xfrm_state *x1;
449         int err;
450
451         afinfo = xfrm_state_get_afinfo(x->props.family);
452         if (unlikely(afinfo == NULL))
453                 return -EAFNOSUPPORT;
454
455         spin_lock_bh(&xfrm_state_lock);
456         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
457
458         err = -ESRCH;
459         if (!x1)
460                 goto out;
461
462         if (xfrm_state_kern(x1)) {
463                 xfrm_state_put(x1);
464                 err = -EEXIST;
465                 goto out;
466         }
467
468         if (x1->km.state == XFRM_STATE_ACQ) {
469                 __xfrm_state_insert(x);
470                 x = NULL;
471         }
472         err = 0;
473
474 out:
475         spin_unlock_bh(&xfrm_state_lock);
476         xfrm_state_put_afinfo(afinfo);
477
478         if (err)
479                 return err;
480
481         if (!x) {
482                 xfrm_state_delete(x1);
483                 xfrm_state_put(x1);
484                 return 0;
485         }
486
487         err = -EINVAL;
488         spin_lock_bh(&x1->lock);
489         if (likely(x1->km.state == XFRM_STATE_VALID)) {
490                 if (x->encap && x1->encap)
491                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
492                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
493                 x1->km.dying = 0;
494
495                 if (!mod_timer(&x1->timer, jiffies + HZ))
496                         xfrm_state_hold(x1);
497                 if (x1->curlft.use_time)
498                         xfrm_state_check_expire(x1);
499
500                 err = 0;
501         }
502         spin_unlock_bh(&x1->lock);
503
504         xfrm_state_put(x1);
505
506         return err;
507 }
508
509 int xfrm_state_check_expire(struct xfrm_state *x)
510 {
511         if (!x->curlft.use_time)
512                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
513
514         if (x->km.state != XFRM_STATE_VALID)
515                 return -EINVAL;
516
517         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
518             x->curlft.packets >= x->lft.hard_packet_limit) {
519                 km_state_expired(x, 1);
520                 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
521                         xfrm_state_hold(x);
522                 return -EINVAL;
523         }
524
525         if (!x->km.dying &&
526             (x->curlft.bytes >= x->lft.soft_byte_limit ||
527              x->curlft.packets >= x->lft.soft_packet_limit))
528                 km_state_expired(x, 0);
529         return 0;
530 }
531
532 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
533 {
534         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
535                 - skb_headroom(skb);
536
537         if (nhead > 0)
538                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
539
540         /* Check tail too... */
541         return 0;
542 }
543
544 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
545 {
546         int err = xfrm_state_check_expire(x);
547         if (err < 0)
548                 goto err;
549         err = xfrm_state_check_space(x, skb);
550 err:
551         return err;
552 }
553
554 struct xfrm_state *
555 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
556                   unsigned short family)
557 {
558         struct xfrm_state *x;
559         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
560         if (!afinfo)
561                 return NULL;
562
563         spin_lock_bh(&xfrm_state_lock);
564         x = afinfo->state_lookup(daddr, spi, proto);
565         spin_unlock_bh(&xfrm_state_lock);
566         xfrm_state_put_afinfo(afinfo);
567         return x;
568 }
569
570 struct xfrm_state *
571 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
572               xfrm_address_t *daddr, xfrm_address_t *saddr, 
573               int create, unsigned short family)
574 {
575         struct xfrm_state *x;
576         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
577         if (!afinfo)
578                 return NULL;
579
580         spin_lock_bh(&xfrm_state_lock);
581         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
582         spin_unlock_bh(&xfrm_state_lock);
583         xfrm_state_put_afinfo(afinfo);
584         return x;
585 }
586
587 /* Silly enough, but I'm lazy to build resolution list */
588
589 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
590 {
591         int i;
592         struct xfrm_state *x;
593
594         for (i = 0; i < XFRM_DST_HSIZE; i++) {
595                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
596                         if (x->km.seq == seq) {
597                                 xfrm_state_hold(x);
598                                 return x;
599                         }
600                 }
601         }
602         return NULL;
603 }
604
605 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
606 {
607         struct xfrm_state *x;
608
609         spin_lock_bh(&xfrm_state_lock);
610         x = __xfrm_find_acq_byseq(seq);
611         spin_unlock_bh(&xfrm_state_lock);
612         return x;
613 }
614  
615 u32 xfrm_get_acqseq(void)
616 {
617         u32 res;
618         static u32 acqseq;
619         static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
620
621         spin_lock_bh(&acqseq_lock);
622         res = (++acqseq ? : ++acqseq);
623         spin_unlock_bh(&acqseq_lock);
624         return res;
625 }
626
627 void
628 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
629 {
630         u32 h;
631         struct xfrm_state *x0;
632
633         if (x->id.spi)
634                 return;
635
636         if (minspi == maxspi) {
637                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
638                 if (x0) {
639                         xfrm_state_put(x0);
640                         return;
641                 }
642                 x->id.spi = minspi;
643         } else {
644                 u32 spi = 0;
645                 minspi = ntohl(minspi);
646                 maxspi = ntohl(maxspi);
647                 for (h=0; h<maxspi-minspi+1; h++) {
648                         spi = minspi + net_random()%(maxspi-minspi+1);
649                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
650                         if (x0 == NULL) {
651                                 x->id.spi = htonl(spi);
652                                 break;
653                         }
654                         xfrm_state_put(x0);
655                 }
656         }
657         if (x->id.spi) {
658                 spin_lock_bh(&xfrm_state_lock);
659                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
660                 list_add(&x->byspi, xfrm_state_byspi+h);
661                 xfrm_state_hold(x);
662                 spin_unlock_bh(&xfrm_state_lock);
663                 wake_up(&km_waitq);
664         }
665 }
666
667 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
668                     void *data)
669 {
670         int i;
671         struct xfrm_state *x;
672         int count = 0;
673         int err = 0;
674
675         spin_lock_bh(&xfrm_state_lock);
676         for (i = 0; i < XFRM_DST_HSIZE; i++) {
677                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
678                         if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
679                                 count++;
680                 }
681         }
682         if (count == 0) {
683                 err = -ENOENT;
684                 goto out;
685         }
686
687         for (i = 0; i < XFRM_DST_HSIZE; i++) {
688                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
689                         if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
690                                 continue;
691                         err = func(x, --count, data);
692                         if (err)
693                                 goto out;
694                 }
695         }
696 out:
697         spin_unlock_bh(&xfrm_state_lock);
698         return err;
699 }
700
701
702 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
703 {
704         u32 diff;
705
706         seq = ntohl(seq);
707
708         if (unlikely(seq == 0))
709                 return -EINVAL;
710
711         if (likely(seq > x->replay.seq))
712                 return 0;
713
714         diff = x->replay.seq - seq;
715         if (diff >= x->props.replay_window) {
716                 x->stats.replay_window++;
717                 return -EINVAL;
718         }
719
720         if (x->replay.bitmap & (1U << diff)) {
721                 x->stats.replay++;
722                 return -EINVAL;
723         }
724         return 0;
725 }
726
727 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
728 {
729         u32 diff;
730
731         seq = ntohl(seq);
732
733         if (seq > x->replay.seq) {
734                 diff = seq - x->replay.seq;
735                 if (diff < x->props.replay_window)
736                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
737                 else
738                         x->replay.bitmap = 1;
739                 x->replay.seq = seq;
740         } else {
741                 diff = x->replay.seq - seq;
742                 x->replay.bitmap |= (1U << diff);
743         }
744 }
745
746 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
747 static rwlock_t         xfrm_km_lock = RW_LOCK_UNLOCKED;
748
749 void km_state_expired(struct xfrm_state *x, int hard)
750 {
751         struct xfrm_mgr *km;
752
753         if (hard)
754                 x->km.state = XFRM_STATE_EXPIRED;
755         else
756                 x->km.dying = 1;
757
758         read_lock(&xfrm_km_lock);
759         list_for_each_entry(km, &xfrm_km_list, list)
760                 km->notify(x, hard);
761         read_unlock(&xfrm_km_lock);
762
763         if (hard)
764                 wake_up(&km_waitq);
765 }
766
767 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
768 {
769         int err = -EINVAL;
770         struct xfrm_mgr *km;
771
772         read_lock(&xfrm_km_lock);
773         list_for_each_entry(km, &xfrm_km_list, list) {
774                 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
775                 if (!err)
776                         break;
777         }
778         read_unlock(&xfrm_km_lock);
779         return err;
780 }
781
782 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
783 {
784         int err = -EINVAL;
785         struct xfrm_mgr *km;
786
787         read_lock(&xfrm_km_lock);
788         list_for_each_entry(km, &xfrm_km_list, list) {
789                 if (km->new_mapping)
790                         err = km->new_mapping(x, ipaddr, sport);
791                 if (!err)
792                         break;
793         }
794         read_unlock(&xfrm_km_lock);
795         return err;
796 }
797
798 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
799 {
800         struct xfrm_mgr *km;
801
802         read_lock(&xfrm_km_lock);
803         list_for_each_entry(km, &xfrm_km_list, list)
804                 if (km->notify_policy)
805                         km->notify_policy(pol, dir, hard);
806         read_unlock(&xfrm_km_lock);
807
808         if (hard)
809                 wake_up(&km_waitq);
810 }
811
812 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
813 {
814         int err;
815         u8 *data;
816         struct xfrm_mgr *km;
817         struct xfrm_policy *pol = NULL;
818
819         if (optlen <= 0 || optlen > PAGE_SIZE)
820                 return -EMSGSIZE;
821
822         data = kmalloc(optlen, GFP_KERNEL);
823         if (!data)
824                 return -ENOMEM;
825
826         err = -EFAULT;
827         if (copy_from_user(data, optval, optlen))
828                 goto out;
829
830         err = -EINVAL;
831         read_lock(&xfrm_km_lock);
832         list_for_each_entry(km, &xfrm_km_list, list) {
833                 pol = km->compile_policy(sk->sk_family, optname, data,
834                                          optlen, &err);
835                 if (err >= 0)
836                         break;
837         }
838         read_unlock(&xfrm_km_lock);
839
840         if (err >= 0) {
841                 xfrm_sk_policy_insert(sk, err, pol);
842                 xfrm_pol_put(pol);
843                 err = 0;
844         }
845
846 out:
847         kfree(data);
848         return err;
849 }
850
851 int xfrm_register_km(struct xfrm_mgr *km)
852 {
853         write_lock_bh(&xfrm_km_lock);
854         list_add_tail(&km->list, &xfrm_km_list);
855         write_unlock_bh(&xfrm_km_lock);
856         return 0;
857 }
858
859 int xfrm_unregister_km(struct xfrm_mgr *km)
860 {
861         write_lock_bh(&xfrm_km_lock);
862         list_del(&km->list);
863         write_unlock_bh(&xfrm_km_lock);
864         return 0;
865 }
866
867 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
868 {
869         int err = 0;
870         if (unlikely(afinfo == NULL))
871                 return -EINVAL;
872         if (unlikely(afinfo->family >= NPROTO))
873                 return -EAFNOSUPPORT;
874         write_lock(&xfrm_state_afinfo_lock);
875         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
876                 err = -ENOBUFS;
877         else {
878                 afinfo->state_bydst = xfrm_state_bydst;
879                 afinfo->state_byspi = xfrm_state_byspi;
880                 xfrm_state_afinfo[afinfo->family] = afinfo;
881         }
882         write_unlock(&xfrm_state_afinfo_lock);
883         return err;
884 }
885
886 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
887 {
888         int err = 0;
889         if (unlikely(afinfo == NULL))
890                 return -EINVAL;
891         if (unlikely(afinfo->family >= NPROTO))
892                 return -EAFNOSUPPORT;
893         write_lock(&xfrm_state_afinfo_lock);
894         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
895                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
896                         err = -EINVAL;
897                 else {
898                         xfrm_state_afinfo[afinfo->family] = NULL;
899                         afinfo->state_byspi = NULL;
900                         afinfo->state_bydst = NULL;
901                 }
902         }
903         write_unlock(&xfrm_state_afinfo_lock);
904         return err;
905 }
906
907 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
908 {
909         struct xfrm_state_afinfo *afinfo;
910         if (unlikely(family >= NPROTO))
911                 return NULL;
912         read_lock(&xfrm_state_afinfo_lock);
913         afinfo = xfrm_state_afinfo[family];
914         if (likely(afinfo != NULL))
915                 read_lock(&afinfo->lock);
916         read_unlock(&xfrm_state_afinfo_lock);
917         return afinfo;
918 }
919
920 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
921 {
922         if (unlikely(afinfo == NULL))
923                 return;
924         read_unlock(&afinfo->lock);
925 }
926
927 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
928 void xfrm_state_delete_tunnel(struct xfrm_state *x)
929 {
930         if (x->tunnel) {
931                 struct xfrm_state *t = x->tunnel;
932
933                 if (atomic_read(&t->tunnel_users) == 2)
934                         xfrm_state_delete(t);
935                 atomic_dec(&t->tunnel_users);
936                 xfrm_state_put(t);
937                 x->tunnel = NULL;
938         }
939 }
940
941 void __init xfrm_state_init(void)
942 {
943         int i;
944
945         for (i=0; i<XFRM_DST_HSIZE; i++) {
946                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
947                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
948         }
949         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
950 }
951