vserver 1.9.3
[linux-2.6.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *      
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
21
22 /* Each xfrm_state may be linked to two tables:
23
24    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25    2. Hash table by daddr to find what SAs exist for given
26       destination/tunnel endpoint. (output)
27  */
28
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
30
31 /* Hash table to find appropriate SA towards given target (endpoint
32  * of tunnel or destination of transport mode) allowed by selector.
33  *
34  * Main use is finding SA after policy selected tunnel or transport mode.
35  * Also, it can be used by ah/esp icmp error handler to find offending SA.
36  */
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
39
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
41
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
44
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
48
49 static void __xfrm_state_delete(struct xfrm_state *x);
50
51 static void xfrm_state_gc_destroy(struct xfrm_state *x)
52 {
53         if (del_timer(&x->timer))
54                 BUG();
55         if (x->aalg)
56                 kfree(x->aalg);
57         if (x->ealg)
58                 kfree(x->ealg);
59         if (x->calg)
60                 kfree(x->calg);
61         if (x->encap)
62                 kfree(x->encap);
63         if (x->type) {
64                 x->type->destructor(x);
65                 xfrm_put_type(x->type);
66         }
67         kfree(x);
68 }
69
70 static void xfrm_state_gc_task(void *data)
71 {
72         struct xfrm_state *x;
73         struct list_head *entry, *tmp;
74         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
75
76         spin_lock_bh(&xfrm_state_gc_lock);
77         list_splice_init(&xfrm_state_gc_list, &gc_list);
78         spin_unlock_bh(&xfrm_state_gc_lock);
79
80         list_for_each_safe(entry, tmp, &gc_list) {
81                 x = list_entry(entry, struct xfrm_state, bydst);
82                 xfrm_state_gc_destroy(x);
83         }
84         wake_up(&km_waitq);
85 }
86
87 static inline unsigned long make_jiffies(long secs)
88 {
89         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
90                 return MAX_SCHEDULE_TIMEOUT-1;
91         else
92                 return secs*HZ;
93 }
94
95 static void xfrm_timer_handler(unsigned long data)
96 {
97         struct xfrm_state *x = (struct xfrm_state*)data;
98         unsigned long now = (unsigned long)xtime.tv_sec;
99         long next = LONG_MAX;
100         int warn = 0;
101
102         spin_lock(&x->lock);
103         if (x->km.state == XFRM_STATE_DEAD)
104                 goto out;
105         if (x->km.state == XFRM_STATE_EXPIRED)
106                 goto expired;
107         if (x->lft.hard_add_expires_seconds) {
108                 long tmo = x->lft.hard_add_expires_seconds +
109                         x->curlft.add_time - now;
110                 if (tmo <= 0)
111                         goto expired;
112                 if (tmo < next)
113                         next = tmo;
114         }
115         if (x->lft.hard_use_expires_seconds) {
116                 long tmo = x->lft.hard_use_expires_seconds +
117                         (x->curlft.use_time ? : now) - now;
118                 if (tmo <= 0)
119                         goto expired;
120                 if (tmo < next)
121                         next = tmo;
122         }
123         if (x->km.dying)
124                 goto resched;
125         if (x->lft.soft_add_expires_seconds) {
126                 long tmo = x->lft.soft_add_expires_seconds +
127                         x->curlft.add_time - now;
128                 if (tmo <= 0)
129                         warn = 1;
130                 else if (tmo < next)
131                         next = tmo;
132         }
133         if (x->lft.soft_use_expires_seconds) {
134                 long tmo = x->lft.soft_use_expires_seconds +
135                         (x->curlft.use_time ? : now) - now;
136                 if (tmo <= 0)
137                         warn = 1;
138                 else if (tmo < next)
139                         next = tmo;
140         }
141
142         if (warn)
143                 km_state_expired(x, 0);
144 resched:
145         if (next != LONG_MAX &&
146             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
147                 xfrm_state_hold(x);
148         goto out;
149
150 expired:
151         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
152                 x->km.state = XFRM_STATE_EXPIRED;
153                 wake_up(&km_waitq);
154                 next = 2;
155                 goto resched;
156         }
157         if (x->id.spi != 0)
158                 km_state_expired(x, 1);
159         __xfrm_state_delete(x);
160
161 out:
162         spin_unlock(&x->lock);
163         xfrm_state_put(x);
164 }
165
166 struct xfrm_state *xfrm_state_alloc(void)
167 {
168         struct xfrm_state *x;
169
170         x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
171
172         if (x) {
173                 memset(x, 0, sizeof(struct xfrm_state));
174                 atomic_set(&x->refcnt, 1);
175                 atomic_set(&x->tunnel_users, 0);
176                 INIT_LIST_HEAD(&x->bydst);
177                 INIT_LIST_HEAD(&x->byspi);
178                 init_timer(&x->timer);
179                 x->timer.function = xfrm_timer_handler;
180                 x->timer.data     = (unsigned long)x;
181                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
182                 x->lft.soft_byte_limit = XFRM_INF;
183                 x->lft.soft_packet_limit = XFRM_INF;
184                 x->lft.hard_byte_limit = XFRM_INF;
185                 x->lft.hard_packet_limit = XFRM_INF;
186                 x->lock = SPIN_LOCK_UNLOCKED;
187         }
188         return x;
189 }
190
191 void __xfrm_state_destroy(struct xfrm_state *x)
192 {
193         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
194
195         spin_lock_bh(&xfrm_state_gc_lock);
196         list_add(&x->bydst, &xfrm_state_gc_list);
197         spin_unlock_bh(&xfrm_state_gc_lock);
198         schedule_work(&xfrm_state_gc_work);
199 }
200
201 static void __xfrm_state_delete(struct xfrm_state *x)
202 {
203         if (x->km.state != XFRM_STATE_DEAD) {
204                 x->km.state = XFRM_STATE_DEAD;
205                 spin_lock(&xfrm_state_lock);
206                 list_del(&x->bydst);
207                 atomic_dec(&x->refcnt);
208                 if (x->id.spi) {
209                         list_del(&x->byspi);
210                         atomic_dec(&x->refcnt);
211                 }
212                 spin_unlock(&xfrm_state_lock);
213                 if (del_timer(&x->timer))
214                         atomic_dec(&x->refcnt);
215
216                 /* The number two in this test is the reference
217                  * mentioned in the comment below plus the reference
218                  * our caller holds.  A larger value means that
219                  * there are DSTs attached to this xfrm_state.
220                  */
221                 if (atomic_read(&x->refcnt) > 2)
222                         xfrm_flush_bundles();
223
224                 /* All xfrm_state objects are created by xfrm_state_alloc.
225                  * The xfrm_state_alloc call gives a reference, and that
226                  * is what we are dropping here.
227                  */
228                 atomic_dec(&x->refcnt);
229         }
230 }
231
232 void xfrm_state_delete(struct xfrm_state *x)
233 {
234         spin_lock_bh(&x->lock);
235         __xfrm_state_delete(x);
236         spin_unlock_bh(&x->lock);
237 }
238
239 void xfrm_state_flush(u8 proto)
240 {
241         int i;
242         struct xfrm_state *x;
243
244         spin_lock_bh(&xfrm_state_lock);
245         for (i = 0; i < XFRM_DST_HSIZE; i++) {
246 restart:
247                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
248                         if (!xfrm_state_kern(x) &&
249                             (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
250                                 xfrm_state_hold(x);
251                                 spin_unlock_bh(&xfrm_state_lock);
252
253                                 xfrm_state_delete(x);
254                                 xfrm_state_put(x);
255
256                                 spin_lock_bh(&xfrm_state_lock);
257                                 goto restart;
258                         }
259                 }
260         }
261         spin_unlock_bh(&xfrm_state_lock);
262         wake_up(&km_waitq);
263 }
264
265 static int
266 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
267                   struct xfrm_tmpl *tmpl,
268                   xfrm_address_t *daddr, xfrm_address_t *saddr,
269                   unsigned short family)
270 {
271         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
272         if (!afinfo)
273                 return -1;
274         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
275         xfrm_state_put_afinfo(afinfo);
276         return 0;
277 }
278
279 struct xfrm_state *
280 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
281                 struct flowi *fl, struct xfrm_tmpl *tmpl,
282                 struct xfrm_policy *pol, int *err,
283                 unsigned short family)
284 {
285         unsigned h = xfrm_dst_hash(daddr, family);
286         struct xfrm_state *x;
287         int acquire_in_progress = 0;
288         int error = 0;
289         struct xfrm_state *best = NULL;
290
291         spin_lock_bh(&xfrm_state_lock);
292         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
293                 if (x->props.family == family &&
294                     x->props.reqid == tmpl->reqid &&
295                     xfrm_state_addr_check(x, daddr, saddr, family) &&
296                     tmpl->mode == x->props.mode &&
297                     tmpl->id.proto == x->id.proto) {
298                         /* Resolution logic:
299                            1. There is a valid state with matching selector.
300                               Done.
301                            2. Valid state with inappropriate selector. Skip.
302
303                            Entering area of "sysdeps".
304
305                            3. If state is not valid, selector is temporary,
306                               it selects only session which triggered
307                               previous resolution. Key manager will do
308                               something to install a state with proper
309                               selector.
310                          */
311                         if (x->km.state == XFRM_STATE_VALID) {
312                                 if (!xfrm_selector_match(&x->sel, fl, family))
313                                         continue;
314                                 if (!best ||
315                                     best->km.dying > x->km.dying ||
316                                     (best->km.dying == x->km.dying &&
317                                      best->curlft.add_time < x->curlft.add_time))
318                                         best = x;
319                         } else if (x->km.state == XFRM_STATE_ACQ) {
320                                 acquire_in_progress = 1;
321                         } else if (x->km.state == XFRM_STATE_ERROR ||
322                                    x->km.state == XFRM_STATE_EXPIRED) {
323                                 if (xfrm_selector_match(&x->sel, fl, family))
324                                         error = 1;
325                         }
326                 }
327         }
328
329         x = best;
330         if (!x && !error && !acquire_in_progress &&
331             ((x = xfrm_state_alloc()) != NULL)) {
332                 /* Initialize temporary selector matching only
333                  * to current session. */
334                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
335
336                 if (km_query(x, tmpl, pol) == 0) {
337                         x->km.state = XFRM_STATE_ACQ;
338                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
339                         xfrm_state_hold(x);
340                         if (x->id.spi) {
341                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
342                                 list_add(&x->byspi, xfrm_state_byspi+h);
343                                 xfrm_state_hold(x);
344                         }
345                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
346                         xfrm_state_hold(x);
347                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
348                         add_timer(&x->timer);
349                 } else {
350                         x->km.state = XFRM_STATE_DEAD;
351                         xfrm_state_put(x);
352                         x = NULL;
353                         error = 1;
354                 }
355         }
356         if (x)
357                 xfrm_state_hold(x);
358         else
359                 *err = acquire_in_progress ? -EAGAIN :
360                         (error ? -ESRCH : -ENOMEM);
361         spin_unlock_bh(&xfrm_state_lock);
362         return x;
363 }
364
365 static void __xfrm_state_insert(struct xfrm_state *x)
366 {
367         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
368
369         list_add(&x->bydst, xfrm_state_bydst+h);
370         xfrm_state_hold(x);
371
372         h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
373
374         list_add(&x->byspi, xfrm_state_byspi+h);
375         xfrm_state_hold(x);
376
377         if (!mod_timer(&x->timer, jiffies + HZ))
378                 xfrm_state_hold(x);
379
380         wake_up(&km_waitq);
381 }
382
383 void xfrm_state_insert(struct xfrm_state *x)
384 {
385         spin_lock_bh(&xfrm_state_lock);
386         __xfrm_state_insert(x);
387         spin_unlock_bh(&xfrm_state_lock);
388 }
389
390 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
391
392 int xfrm_state_add(struct xfrm_state *x)
393 {
394         struct xfrm_state_afinfo *afinfo;
395         struct xfrm_state *x1;
396         int family;
397         int err;
398
399         family = x->props.family;
400         afinfo = xfrm_state_get_afinfo(family);
401         if (unlikely(afinfo == NULL))
402                 return -EAFNOSUPPORT;
403
404         spin_lock_bh(&xfrm_state_lock);
405
406         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
407         if (x1) {
408                 xfrm_state_put(x1);
409                 x1 = NULL;
410                 err = -EEXIST;
411                 goto out;
412         }
413
414         if (x->km.seq) {
415                 x1 = __xfrm_find_acq_byseq(x->km.seq);
416                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
417                         xfrm_state_put(x1);
418                         x1 = NULL;
419                 }
420         }
421
422         if (!x1)
423                 x1 = afinfo->find_acq(
424                         x->props.mode, x->props.reqid, x->id.proto,
425                         &x->id.daddr, &x->props.saddr, 0);
426
427         __xfrm_state_insert(x);
428         err = 0;
429
430 out:
431         spin_unlock_bh(&xfrm_state_lock);
432         xfrm_state_put_afinfo(afinfo);
433
434         if (x1) {
435                 xfrm_state_delete(x1);
436                 xfrm_state_put(x1);
437         }
438
439         return err;
440 }
441
442 int xfrm_state_update(struct xfrm_state *x)
443 {
444         struct xfrm_state_afinfo *afinfo;
445         struct xfrm_state *x1;
446         int err;
447
448         afinfo = xfrm_state_get_afinfo(x->props.family);
449         if (unlikely(afinfo == NULL))
450                 return -EAFNOSUPPORT;
451
452         spin_lock_bh(&xfrm_state_lock);
453         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
454
455         err = -ESRCH;
456         if (!x1)
457                 goto out;
458
459         if (xfrm_state_kern(x1)) {
460                 xfrm_state_put(x1);
461                 err = -EEXIST;
462                 goto out;
463         }
464
465         if (x1->km.state == XFRM_STATE_ACQ) {
466                 __xfrm_state_insert(x);
467                 x = NULL;
468         }
469         err = 0;
470
471 out:
472         spin_unlock_bh(&xfrm_state_lock);
473         xfrm_state_put_afinfo(afinfo);
474
475         if (err)
476                 return err;
477
478         if (!x) {
479                 xfrm_state_delete(x1);
480                 xfrm_state_put(x1);
481                 return 0;
482         }
483
484         err = -EINVAL;
485         spin_lock_bh(&x1->lock);
486         if (likely(x1->km.state == XFRM_STATE_VALID)) {
487                 if (x->encap && x1->encap)
488                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
489                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
490                 x1->km.dying = 0;
491
492                 if (!mod_timer(&x1->timer, jiffies + HZ))
493                         xfrm_state_hold(x1);
494                 if (x1->curlft.use_time)
495                         xfrm_state_check_expire(x1);
496
497                 err = 0;
498         }
499         spin_unlock_bh(&x1->lock);
500
501         xfrm_state_put(x1);
502
503         return err;
504 }
505
506 int xfrm_state_check_expire(struct xfrm_state *x)
507 {
508         if (!x->curlft.use_time)
509                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
510
511         if (x->km.state != XFRM_STATE_VALID)
512                 return -EINVAL;
513
514         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
515             x->curlft.packets >= x->lft.hard_packet_limit) {
516                 km_state_expired(x, 1);
517                 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
518                         xfrm_state_hold(x);
519                 return -EINVAL;
520         }
521
522         if (!x->km.dying &&
523             (x->curlft.bytes >= x->lft.soft_byte_limit ||
524              x->curlft.packets >= x->lft.soft_packet_limit))
525                 km_state_expired(x, 0);
526         return 0;
527 }
528
529 int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
530 {
531         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
532                 - skb_headroom(skb);
533
534         if (nhead > 0)
535                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
536
537         /* Check tail too... */
538         return 0;
539 }
540
541 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
542 {
543         int err = xfrm_state_check_expire(x);
544         if (err < 0)
545                 goto err;
546         err = xfrm_state_check_space(x, skb);
547 err:
548         return err;
549 }
550
551 struct xfrm_state *
552 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
553                   unsigned short family)
554 {
555         struct xfrm_state *x;
556         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
557         if (!afinfo)
558                 return NULL;
559
560         spin_lock_bh(&xfrm_state_lock);
561         x = afinfo->state_lookup(daddr, spi, proto);
562         spin_unlock_bh(&xfrm_state_lock);
563         xfrm_state_put_afinfo(afinfo);
564         return x;
565 }
566
567 struct xfrm_state *
568 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
569               xfrm_address_t *daddr, xfrm_address_t *saddr, 
570               int create, unsigned short family)
571 {
572         struct xfrm_state *x;
573         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
574         if (!afinfo)
575                 return NULL;
576
577         spin_lock_bh(&xfrm_state_lock);
578         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
579         spin_unlock_bh(&xfrm_state_lock);
580         xfrm_state_put_afinfo(afinfo);
581         return x;
582 }
583
584 /* Silly enough, but I'm lazy to build resolution list */
585
586 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
587 {
588         int i;
589         struct xfrm_state *x;
590
591         for (i = 0; i < XFRM_DST_HSIZE; i++) {
592                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
593                         if (x->km.seq == seq) {
594                                 xfrm_state_hold(x);
595                                 return x;
596                         }
597                 }
598         }
599         return NULL;
600 }
601
602 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
603 {
604         struct xfrm_state *x;
605
606         spin_lock_bh(&xfrm_state_lock);
607         x = __xfrm_find_acq_byseq(seq);
608         spin_unlock_bh(&xfrm_state_lock);
609         return x;
610 }
611  
612 u32 xfrm_get_acqseq(void)
613 {
614         u32 res;
615         static u32 acqseq;
616         static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
617
618         spin_lock_bh(&acqseq_lock);
619         res = (++acqseq ? : ++acqseq);
620         spin_unlock_bh(&acqseq_lock);
621         return res;
622 }
623
624 void
625 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
626 {
627         u32 h;
628         struct xfrm_state *x0;
629
630         if (x->id.spi)
631                 return;
632
633         if (minspi == maxspi) {
634                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
635                 if (x0) {
636                         xfrm_state_put(x0);
637                         return;
638                 }
639                 x->id.spi = minspi;
640         } else {
641                 u32 spi = 0;
642                 minspi = ntohl(minspi);
643                 maxspi = ntohl(maxspi);
644                 for (h=0; h<maxspi-minspi+1; h++) {
645                         spi = minspi + net_random()%(maxspi-minspi+1);
646                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
647                         if (x0 == NULL) {
648                                 x->id.spi = htonl(spi);
649                                 break;
650                         }
651                         xfrm_state_put(x0);
652                 }
653         }
654         if (x->id.spi) {
655                 spin_lock_bh(&xfrm_state_lock);
656                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
657                 list_add(&x->byspi, xfrm_state_byspi+h);
658                 xfrm_state_hold(x);
659                 spin_unlock_bh(&xfrm_state_lock);
660                 wake_up(&km_waitq);
661         }
662 }
663
664 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
665                     void *data)
666 {
667         int i;
668         struct xfrm_state *x;
669         int count = 0;
670         int err = 0;
671
672         spin_lock_bh(&xfrm_state_lock);
673         for (i = 0; i < XFRM_DST_HSIZE; i++) {
674                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
675                         if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
676                                 count++;
677                 }
678         }
679         if (count == 0) {
680                 err = -ENOENT;
681                 goto out;
682         }
683
684         for (i = 0; i < XFRM_DST_HSIZE; i++) {
685                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
686                         if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
687                                 continue;
688                         err = func(x, --count, data);
689                         if (err)
690                                 goto out;
691                 }
692         }
693 out:
694         spin_unlock_bh(&xfrm_state_lock);
695         return err;
696 }
697
698
699 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
700 {
701         u32 diff;
702
703         seq = ntohl(seq);
704
705         if (unlikely(seq == 0))
706                 return -EINVAL;
707
708         if (likely(seq > x->replay.seq))
709                 return 0;
710
711         diff = x->replay.seq - seq;
712         if (diff >= x->props.replay_window) {
713                 x->stats.replay_window++;
714                 return -EINVAL;
715         }
716
717         if (x->replay.bitmap & (1U << diff)) {
718                 x->stats.replay++;
719                 return -EINVAL;
720         }
721         return 0;
722 }
723
724 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
725 {
726         u32 diff;
727
728         seq = ntohl(seq);
729
730         if (seq > x->replay.seq) {
731                 diff = seq - x->replay.seq;
732                 if (diff < x->props.replay_window)
733                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
734                 else
735                         x->replay.bitmap = 1;
736                 x->replay.seq = seq;
737         } else {
738                 diff = x->replay.seq - seq;
739                 x->replay.bitmap |= (1U << diff);
740         }
741 }
742
743 int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
744 {
745         int i;
746
747         for (i=0; i<n; i++) {
748                 int match;
749                 match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
750                 if (!match)
751                         return -EINVAL;
752         }
753         return 0;
754 }
755
756 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
757 static rwlock_t         xfrm_km_lock = RW_LOCK_UNLOCKED;
758
759 void km_state_expired(struct xfrm_state *x, int hard)
760 {
761         struct xfrm_mgr *km;
762
763         if (hard)
764                 x->km.state = XFRM_STATE_EXPIRED;
765         else
766                 x->km.dying = 1;
767
768         read_lock(&xfrm_km_lock);
769         list_for_each_entry(km, &xfrm_km_list, list)
770                 km->notify(x, hard);
771         read_unlock(&xfrm_km_lock);
772
773         if (hard)
774                 wake_up(&km_waitq);
775 }
776
777 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
778 {
779         int err = -EINVAL;
780         struct xfrm_mgr *km;
781
782         read_lock(&xfrm_km_lock);
783         list_for_each_entry(km, &xfrm_km_list, list) {
784                 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
785                 if (!err)
786                         break;
787         }
788         read_unlock(&xfrm_km_lock);
789         return err;
790 }
791
792 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
793 {
794         int err = -EINVAL;
795         struct xfrm_mgr *km;
796
797         read_lock(&xfrm_km_lock);
798         list_for_each_entry(km, &xfrm_km_list, list) {
799                 if (km->new_mapping)
800                         err = km->new_mapping(x, ipaddr, sport);
801                 if (!err)
802                         break;
803         }
804         read_unlock(&xfrm_km_lock);
805         return err;
806 }
807
808 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
809 {
810         struct xfrm_mgr *km;
811
812         read_lock(&xfrm_km_lock);
813         list_for_each_entry(km, &xfrm_km_list, list)
814                 if (km->notify_policy)
815                         km->notify_policy(pol, dir, hard);
816         read_unlock(&xfrm_km_lock);
817
818         if (hard)
819                 wake_up(&km_waitq);
820 }
821
822 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
823 {
824         int err;
825         u8 *data;
826         struct xfrm_mgr *km;
827         struct xfrm_policy *pol = NULL;
828
829         if (optlen <= 0 || optlen > PAGE_SIZE)
830                 return -EMSGSIZE;
831
832         data = kmalloc(optlen, GFP_KERNEL);
833         if (!data)
834                 return -ENOMEM;
835
836         err = -EFAULT;
837         if (copy_from_user(data, optval, optlen))
838                 goto out;
839
840         err = -EINVAL;
841         read_lock(&xfrm_km_lock);
842         list_for_each_entry(km, &xfrm_km_list, list) {
843                 pol = km->compile_policy(sk->sk_family, optname, data,
844                                          optlen, &err);
845                 if (err >= 0)
846                         break;
847         }
848         read_unlock(&xfrm_km_lock);
849
850         if (err >= 0) {
851                 xfrm_sk_policy_insert(sk, err, pol);
852                 xfrm_pol_put(pol);
853                 err = 0;
854         }
855
856 out:
857         kfree(data);
858         return err;
859 }
860
861 int xfrm_register_km(struct xfrm_mgr *km)
862 {
863         write_lock_bh(&xfrm_km_lock);
864         list_add_tail(&km->list, &xfrm_km_list);
865         write_unlock_bh(&xfrm_km_lock);
866         return 0;
867 }
868
869 int xfrm_unregister_km(struct xfrm_mgr *km)
870 {
871         write_lock_bh(&xfrm_km_lock);
872         list_del(&km->list);
873         write_unlock_bh(&xfrm_km_lock);
874         return 0;
875 }
876
877 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
878 {
879         int err = 0;
880         if (unlikely(afinfo == NULL))
881                 return -EINVAL;
882         if (unlikely(afinfo->family >= NPROTO))
883                 return -EAFNOSUPPORT;
884         write_lock(&xfrm_state_afinfo_lock);
885         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
886                 err = -ENOBUFS;
887         else {
888                 afinfo->state_bydst = xfrm_state_bydst;
889                 afinfo->state_byspi = xfrm_state_byspi;
890                 xfrm_state_afinfo[afinfo->family] = afinfo;
891         }
892         write_unlock(&xfrm_state_afinfo_lock);
893         return err;
894 }
895
896 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
897 {
898         int err = 0;
899         if (unlikely(afinfo == NULL))
900                 return -EINVAL;
901         if (unlikely(afinfo->family >= NPROTO))
902                 return -EAFNOSUPPORT;
903         write_lock(&xfrm_state_afinfo_lock);
904         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
905                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
906                         err = -EINVAL;
907                 else {
908                         xfrm_state_afinfo[afinfo->family] = NULL;
909                         afinfo->state_byspi = NULL;
910                         afinfo->state_bydst = NULL;
911                 }
912         }
913         write_unlock(&xfrm_state_afinfo_lock);
914         return err;
915 }
916
917 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
918 {
919         struct xfrm_state_afinfo *afinfo;
920         if (unlikely(family >= NPROTO))
921                 return NULL;
922         read_lock(&xfrm_state_afinfo_lock);
923         afinfo = xfrm_state_afinfo[family];
924         if (likely(afinfo != NULL))
925                 read_lock(&afinfo->lock);
926         read_unlock(&xfrm_state_afinfo_lock);
927         return afinfo;
928 }
929
930 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
931 {
932         if (unlikely(afinfo == NULL))
933                 return;
934         read_unlock(&afinfo->lock);
935 }
936
937 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
938 void xfrm_state_delete_tunnel(struct xfrm_state *x)
939 {
940         if (x->tunnel) {
941                 struct xfrm_state *t = x->tunnel;
942
943                 if (atomic_read(&t->tunnel_users) == 2)
944                         xfrm_state_delete(t);
945                 atomic_dec(&t->tunnel_users);
946                 xfrm_state_put(t);
947                 x->tunnel = NULL;
948         }
949 }
950
951 void __init xfrm_state_init(void)
952 {
953         int i;
954
955         for (i=0; i<XFRM_DST_HSIZE; i++) {
956                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
957                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
958         }
959         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
960 }
961