ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *      
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
21
22 /* Each xfrm_state may be linked to two tables:
23
24    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25    2. Hash table by daddr to find what SAs exist for given
26       destination/tunnel endpoint. (output)
27  */
28
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
30
31 /* Hash table to find appropriate SA towards given target (endpoint
32  * of tunnel or destination of transport mode) allowed by selector.
33  *
34  * Main use is finding SA after policy selected tunnel or transport mode.
35  * Also, it can be used by ah/esp icmp error handler to find offending SA.
36  */
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
39
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
41
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
44
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
48
49 static void __xfrm_state_delete(struct xfrm_state *x);
50
51 static void xfrm_state_gc_destroy(struct xfrm_state *x)
52 {
53         if (del_timer(&x->timer))
54                 BUG();
55         if (x->aalg)
56                 kfree(x->aalg);
57         if (x->ealg)
58                 kfree(x->ealg);
59         if (x->calg)
60                 kfree(x->calg);
61         if (x->encap)
62                 kfree(x->encap);
63         if (x->type) {
64                 x->type->destructor(x);
65                 xfrm_put_type(x->type);
66         }
67         kfree(x);
68         wake_up(&km_waitq);
69 }
70
71 static void xfrm_state_gc_task(void *data)
72 {
73         struct xfrm_state *x;
74         struct list_head *entry, *tmp;
75         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
76
77         spin_lock_bh(&xfrm_state_gc_lock);
78         list_splice_init(&xfrm_state_gc_list, &gc_list);
79         spin_unlock_bh(&xfrm_state_gc_lock);
80
81         list_for_each_safe(entry, tmp, &gc_list) {
82                 x = list_entry(entry, struct xfrm_state, bydst);
83                 xfrm_state_gc_destroy(x);
84         }
85 }
86
87 static inline unsigned long make_jiffies(long secs)
88 {
89         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
90                 return MAX_SCHEDULE_TIMEOUT-1;
91         else
92                 return secs*HZ;
93 }
94
95 static void xfrm_timer_handler(unsigned long data)
96 {
97         struct xfrm_state *x = (struct xfrm_state*)data;
98         unsigned long now = (unsigned long)xtime.tv_sec;
99         long next = LONG_MAX;
100         int warn = 0;
101
102         spin_lock(&x->lock);
103         if (x->km.state == XFRM_STATE_DEAD)
104                 goto out;
105         if (x->km.state == XFRM_STATE_EXPIRED)
106                 goto expired;
107         if (x->lft.hard_add_expires_seconds) {
108                 long tmo = x->lft.hard_add_expires_seconds +
109                         x->curlft.add_time - now;
110                 if (tmo <= 0)
111                         goto expired;
112                 if (tmo < next)
113                         next = tmo;
114         }
115         if (x->lft.hard_use_expires_seconds) {
116                 long tmo = x->lft.hard_use_expires_seconds +
117                         (x->curlft.use_time ? : now) - now;
118                 if (tmo <= 0)
119                         goto expired;
120                 if (tmo < next)
121                         next = tmo;
122         }
123         if (x->km.dying)
124                 goto resched;
125         if (x->lft.soft_add_expires_seconds) {
126                 long tmo = x->lft.soft_add_expires_seconds +
127                         x->curlft.add_time - now;
128                 if (tmo <= 0)
129                         warn = 1;
130                 else if (tmo < next)
131                         next = tmo;
132         }
133         if (x->lft.soft_use_expires_seconds) {
134                 long tmo = x->lft.soft_use_expires_seconds +
135                         (x->curlft.use_time ? : now) - now;
136                 if (tmo <= 0)
137                         warn = 1;
138                 else if (tmo < next)
139                         next = tmo;
140         }
141
142         if (warn)
143                 km_state_expired(x, 0);
144 resched:
145         if (next != LONG_MAX &&
146             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
147                 xfrm_state_hold(x);
148         goto out;
149
150 expired:
151         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
152                 x->km.state = XFRM_STATE_EXPIRED;
153                 wake_up(&km_waitq);
154                 next = 2;
155                 goto resched;
156         }
157         if (x->id.spi != 0)
158                 km_state_expired(x, 1);
159         __xfrm_state_delete(x);
160
161 out:
162         spin_unlock(&x->lock);
163         xfrm_state_put(x);
164 }
165
166 struct xfrm_state *xfrm_state_alloc(void)
167 {
168         struct xfrm_state *x;
169
170         x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
171
172         if (x) {
173                 memset(x, 0, sizeof(struct xfrm_state));
174                 atomic_set(&x->refcnt, 1);
175                 atomic_set(&x->tunnel_users, 0);
176                 INIT_LIST_HEAD(&x->bydst);
177                 INIT_LIST_HEAD(&x->byspi);
178                 init_timer(&x->timer);
179                 x->timer.function = xfrm_timer_handler;
180                 x->timer.data     = (unsigned long)x;
181                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
182                 x->lft.soft_byte_limit = XFRM_INF;
183                 x->lft.soft_packet_limit = XFRM_INF;
184                 x->lft.hard_byte_limit = XFRM_INF;
185                 x->lft.hard_packet_limit = XFRM_INF;
186                 x->lock = SPIN_LOCK_UNLOCKED;
187         }
188         return x;
189 }
190
191 void __xfrm_state_destroy(struct xfrm_state *x)
192 {
193         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
194
195         spin_lock_bh(&xfrm_state_gc_lock);
196         list_add(&x->bydst, &xfrm_state_gc_list);
197         spin_unlock_bh(&xfrm_state_gc_lock);
198         schedule_work(&xfrm_state_gc_work);
199 }
200
201 static void __xfrm_state_delete(struct xfrm_state *x)
202 {
203         if (x->km.state != XFRM_STATE_DEAD) {
204                 x->km.state = XFRM_STATE_DEAD;
205                 spin_lock(&xfrm_state_lock);
206                 list_del(&x->bydst);
207                 atomic_dec(&x->refcnt);
208                 if (x->id.spi) {
209                         list_del(&x->byspi);
210                         atomic_dec(&x->refcnt);
211                 }
212                 spin_unlock(&xfrm_state_lock);
213                 if (del_timer(&x->timer))
214                         atomic_dec(&x->refcnt);
215
216                 /* The number two in this test is the reference
217                  * mentioned in the comment below plus the reference
218                  * our caller holds.  A larger value means that
219                  * there are DSTs attached to this xfrm_state.
220                  */
221                 if (atomic_read(&x->refcnt) > 2)
222                         xfrm_flush_bundles();
223
224                 /* All xfrm_state objects are created by one of two possible
225                  * paths:
226                  *
227                  * 2) xfrm_state_lookup --> xfrm_state_insert
228                  *
229                  * The xfrm_state_lookup or xfrm_state_alloc call gives a
230                  * reference, and that is what we are dropping here.
231                  */
232                 atomic_dec(&x->refcnt);
233         }
234 }
235
236 void xfrm_state_delete(struct xfrm_state *x)
237 {
238         xfrm_state_delete_tunnel(x);
239         spin_lock_bh(&x->lock);
240         __xfrm_state_delete(x);
241         spin_unlock_bh(&x->lock);
242 }
243
244 void xfrm_state_flush(u8 proto)
245 {
246         int i;
247         struct xfrm_state *x;
248
249         spin_lock_bh(&xfrm_state_lock);
250         for (i = 0; i < XFRM_DST_HSIZE; i++) {
251 restart:
252                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
253                         if (!xfrm_state_kern(x) &&
254                             (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
255                                 xfrm_state_hold(x);
256                                 spin_unlock_bh(&xfrm_state_lock);
257
258                                 xfrm_state_delete(x);
259                                 xfrm_state_put(x);
260
261                                 spin_lock_bh(&xfrm_state_lock);
262                                 goto restart;
263                         }
264                 }
265         }
266         spin_unlock_bh(&xfrm_state_lock);
267         wake_up(&km_waitq);
268 }
269
270 static int
271 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
272                   struct xfrm_tmpl *tmpl,
273                   xfrm_address_t *daddr, xfrm_address_t *saddr,
274                   unsigned short family)
275 {
276         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
277         if (!afinfo)
278                 return -1;
279         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
280         xfrm_state_put_afinfo(afinfo);
281         return 0;
282 }
283
284 struct xfrm_state *
285 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
286                 struct flowi *fl, struct xfrm_tmpl *tmpl,
287                 struct xfrm_policy *pol, int *err,
288                 unsigned short family)
289 {
290         unsigned h = xfrm_dst_hash(daddr, family);
291         struct xfrm_state *x;
292         int acquire_in_progress = 0;
293         int error = 0;
294         struct xfrm_state *best = NULL;
295
296         spin_lock_bh(&xfrm_state_lock);
297         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
298                 if (x->props.family == family &&
299                     x->props.reqid == tmpl->reqid &&
300                     xfrm_state_addr_check(x, daddr, saddr, family) &&
301                     tmpl->mode == x->props.mode &&
302                     tmpl->id.proto == x->id.proto) {
303                         /* Resolution logic:
304                            1. There is a valid state with matching selector.
305                               Done.
306                            2. Valid state with inappropriate selector. Skip.
307
308                            Entering area of "sysdeps".
309
310                            3. If state is not valid, selector is temporary,
311                               it selects only session which triggered
312                               previous resolution. Key manager will do
313                               something to install a state with proper
314                               selector.
315                          */
316                         if (x->km.state == XFRM_STATE_VALID) {
317                                 if (!xfrm_selector_match(&x->sel, fl, family))
318                                         continue;
319                                 if (!best ||
320                                     best->km.dying > x->km.dying ||
321                                     (best->km.dying == x->km.dying &&
322                                      best->curlft.add_time < x->curlft.add_time))
323                                         best = x;
324                         } else if (x->km.state == XFRM_STATE_ACQ) {
325                                 acquire_in_progress = 1;
326                         } else if (x->km.state == XFRM_STATE_ERROR ||
327                                    x->km.state == XFRM_STATE_EXPIRED) {
328                                 if (xfrm_selector_match(&x->sel, fl, family))
329                                         error = 1;
330                         }
331                 }
332         }
333
334         if (best) {
335                 xfrm_state_hold(best);
336                 spin_unlock_bh(&xfrm_state_lock);
337                 return best;
338         }
339
340         x = NULL;
341         if (!error && !acquire_in_progress &&
342             ((x = xfrm_state_alloc()) != NULL)) {
343                 /* Initialize temporary selector matching only
344                  * to current session. */
345                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
346
347                 if (km_query(x, tmpl, pol) == 0) {
348                         x->km.state = XFRM_STATE_ACQ;
349                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
350                         xfrm_state_hold(x);
351                         if (x->id.spi) {
352                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
353                                 list_add(&x->byspi, xfrm_state_byspi+h);
354                                 xfrm_state_hold(x);
355                         }
356                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
357                         xfrm_state_hold(x);
358                         mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ);
359                 } else {
360                         x->km.state = XFRM_STATE_DEAD;
361                         xfrm_state_put(x);
362                         x = NULL;
363                         error = 1;
364                 }
365         }
366         spin_unlock_bh(&xfrm_state_lock);
367         if (!x)
368                 *err = acquire_in_progress ? -EAGAIN :
369                         (error ? -ESRCH : -ENOMEM);
370         return x;
371 }
372
373 static void __xfrm_state_insert(struct xfrm_state *x)
374 {
375         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
376
377         list_add(&x->bydst, xfrm_state_bydst+h);
378         xfrm_state_hold(x);
379
380         h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
381
382         list_add(&x->byspi, xfrm_state_byspi+h);
383         xfrm_state_hold(x);
384
385         if (!mod_timer(&x->timer, jiffies + HZ))
386                 xfrm_state_hold(x);
387
388         wake_up(&km_waitq);
389 }
390
391 void xfrm_state_insert(struct xfrm_state *x)
392 {
393         spin_lock_bh(&xfrm_state_lock);
394         __xfrm_state_insert(x);
395         spin_unlock_bh(&xfrm_state_lock);
396 }
397
398 int xfrm_state_add(struct xfrm_state *x)
399 {
400         struct xfrm_state_afinfo *afinfo;
401         struct xfrm_state *x1;
402         int err;
403
404         afinfo = xfrm_state_get_afinfo(x->props.family);
405         if (unlikely(afinfo == NULL))
406                 return -EAFNOSUPPORT;
407
408         spin_lock_bh(&xfrm_state_lock);
409
410         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
411         if (!x1) {
412                 x1 = afinfo->find_acq(
413                         x->props.mode, x->props.reqid, x->id.proto,
414                         &x->id.daddr, &x->props.saddr, 0);
415                 if (x1 && x1->id.spi != x->id.spi && x1->id.spi) {
416                         xfrm_state_put(x1);
417                         x1 = NULL;
418                 }
419         }
420
421         if (x1 && x1->id.spi) {
422                 xfrm_state_put(x1);
423                 x1 = NULL;
424                 err = -EEXIST;
425                 goto out;
426         }
427
428         __xfrm_state_insert(x);
429         err = 0;
430
431 out:
432         spin_unlock_bh(&xfrm_state_lock);
433         xfrm_state_put_afinfo(afinfo);
434
435         if (x1) {
436                 xfrm_state_delete(x1);
437                 xfrm_state_put(x1);
438         }
439
440         return err;
441 }
442
443 int xfrm_state_update(struct xfrm_state *x)
444 {
445         struct xfrm_state_afinfo *afinfo;
446         struct xfrm_state *x1;
447         int err;
448
449         afinfo = xfrm_state_get_afinfo(x->props.family);
450         if (unlikely(afinfo == NULL))
451                 return -EAFNOSUPPORT;
452
453         spin_lock_bh(&xfrm_state_lock);
454         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
455
456         err = -ESRCH;
457         if (!x1)
458                 goto out;
459
460         if (xfrm_state_kern(x1)) {
461                 xfrm_state_put(x1);
462                 err = -EEXIST;
463                 goto out;
464         }
465
466         if (x1->km.state == XFRM_STATE_ACQ) {
467                 __xfrm_state_insert(x);
468                 x = NULL;
469         }
470         err = 0;
471
472 out:
473         spin_unlock_bh(&xfrm_state_lock);
474         xfrm_state_put_afinfo(afinfo);
475
476         if (err)
477                 return err;
478
479         if (!x) {
480                 xfrm_state_delete(x1);
481                 xfrm_state_put(x1);
482                 return 0;
483         }
484
485         err = -EINVAL;
486         spin_lock_bh(&x1->lock);
487         if (likely(x1->km.state == XFRM_STATE_VALID)) {
488                 if (x->encap && x1->encap)
489                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
490                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
491                 x1->km.dying = 0;
492                 err = 0;
493         }
494         spin_unlock_bh(&x1->lock);
495
496         if (!mod_timer(&x1->timer, jiffies + HZ))
497                 xfrm_state_hold(x1);
498         if (x1->curlft.use_time)
499                 xfrm_state_check_expire(x1);
500
501         xfrm_state_put(x1);
502
503         return err;
504 }
505
506 int xfrm_state_check_expire(struct xfrm_state *x)
507 {
508         if (!x->curlft.use_time)
509                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
510
511         if (x->km.state != XFRM_STATE_VALID)
512                 return -EINVAL;
513
514         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
515             x->curlft.packets >= x->lft.hard_packet_limit) {
516                 km_state_expired(x, 1);
517                 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
518                         xfrm_state_hold(x);
519                 return -EINVAL;
520         }
521
522         if (!x->km.dying &&
523             (x->curlft.bytes >= x->lft.soft_byte_limit ||
524              x->curlft.packets >= x->lft.soft_packet_limit))
525                 km_state_expired(x, 0);
526         return 0;
527 }
528
529 int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
530 {
531         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
532                 - skb_headroom(skb);
533
534         if (nhead > 0)
535                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
536
537         /* Check tail too... */
538         return 0;
539 }
540
541 struct xfrm_state *
542 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
543                   unsigned short family)
544 {
545         struct xfrm_state *x;
546         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
547         if (!afinfo)
548                 return NULL;
549
550         spin_lock_bh(&xfrm_state_lock);
551         x = afinfo->state_lookup(daddr, spi, proto);
552         spin_unlock_bh(&xfrm_state_lock);
553         xfrm_state_put_afinfo(afinfo);
554         return x;
555 }
556
557 struct xfrm_state *
558 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
559               xfrm_address_t *daddr, xfrm_address_t *saddr, 
560               int create, unsigned short family)
561 {
562         struct xfrm_state *x;
563         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
564         if (!afinfo)
565                 return NULL;
566
567         spin_lock_bh(&xfrm_state_lock);
568         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
569         spin_unlock_bh(&xfrm_state_lock);
570         xfrm_state_put_afinfo(afinfo);
571         return x;
572 }
573
574 /* Silly enough, but I'm lazy to build resolution list */
575
576 struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
577 {
578         int i;
579         struct xfrm_state *x;
580
581         spin_lock_bh(&xfrm_state_lock);
582         for (i = 0; i < XFRM_DST_HSIZE; i++) {
583                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
584                         if (x->km.seq == seq) {
585                                 xfrm_state_hold(x);
586                                 spin_unlock_bh(&xfrm_state_lock);
587                                 return x;
588                         }
589                 }
590         }
591         spin_unlock_bh(&xfrm_state_lock);
592         return NULL;
593 }
594  
595 u32 xfrm_get_acqseq(void)
596 {
597         u32 res;
598         static u32 acqseq;
599         static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
600
601         spin_lock_bh(&acqseq_lock);
602         res = (++acqseq ? : ++acqseq);
603         spin_unlock_bh(&acqseq_lock);
604         return res;
605 }
606
607 void
608 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
609 {
610         u32 h;
611         struct xfrm_state *x0;
612
613         if (x->id.spi)
614                 return;
615
616         if (minspi == maxspi) {
617                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
618                 if (x0) {
619                         xfrm_state_put(x0);
620                         return;
621                 }
622                 x->id.spi = minspi;
623         } else {
624                 u32 spi = 0;
625                 minspi = ntohl(minspi);
626                 maxspi = ntohl(maxspi);
627                 for (h=0; h<maxspi-minspi+1; h++) {
628                         spi = minspi + net_random()%(maxspi-minspi+1);
629                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
630                         if (x0 == NULL)
631                                 break;
632                         xfrm_state_put(x0);
633                 }
634                 x->id.spi = htonl(spi);
635         }
636         if (x->id.spi) {
637                 spin_lock_bh(&xfrm_state_lock);
638                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
639                 list_add(&x->byspi, xfrm_state_byspi+h);
640                 xfrm_state_hold(x);
641                 spin_unlock_bh(&xfrm_state_lock);
642                 wake_up(&km_waitq);
643         }
644 }
645
646 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
647                     void *data)
648 {
649         int i;
650         struct xfrm_state *x;
651         int count = 0;
652         int err = 0;
653
654         spin_lock_bh(&xfrm_state_lock);
655         for (i = 0; i < XFRM_DST_HSIZE; i++) {
656                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
657                         if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
658                                 count++;
659                 }
660         }
661         if (count == 0) {
662                 err = -ENOENT;
663                 goto out;
664         }
665
666         for (i = 0; i < XFRM_DST_HSIZE; i++) {
667                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
668                         if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
669                                 continue;
670                         err = func(x, --count, data);
671                         if (err)
672                                 goto out;
673                 }
674         }
675 out:
676         spin_unlock_bh(&xfrm_state_lock);
677         return err;
678 }
679
680
681 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
682 {
683         u32 diff;
684
685         seq = ntohl(seq);
686
687         if (unlikely(seq == 0))
688                 return -EINVAL;
689
690         if (likely(seq > x->replay.seq))
691                 return 0;
692
693         diff = x->replay.seq - seq;
694         if (diff >= x->props.replay_window) {
695                 x->stats.replay_window++;
696                 return -EINVAL;
697         }
698
699         if (x->replay.bitmap & (1U << diff)) {
700                 x->stats.replay++;
701                 return -EINVAL;
702         }
703         return 0;
704 }
705
706 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
707 {
708         u32 diff;
709
710         seq = ntohl(seq);
711
712         if (seq > x->replay.seq) {
713                 diff = seq - x->replay.seq;
714                 if (diff < x->props.replay_window)
715                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
716                 else
717                         x->replay.bitmap = 1;
718                 x->replay.seq = seq;
719         } else {
720                 diff = x->replay.seq - seq;
721                 x->replay.bitmap |= (1U << diff);
722         }
723 }
724
725 int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
726 {
727         int i;
728
729         for (i=0; i<n; i++) {
730                 int match;
731                 match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
732                 if (!match)
733                         return -EINVAL;
734         }
735         return 0;
736 }
737
738 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
739 static rwlock_t         xfrm_km_lock = RW_LOCK_UNLOCKED;
740
741 void km_state_expired(struct xfrm_state *x, int hard)
742 {
743         struct xfrm_mgr *km;
744
745         if (hard)
746                 x->km.state = XFRM_STATE_EXPIRED;
747         else
748                 x->km.dying = 1;
749
750         read_lock(&xfrm_km_lock);
751         list_for_each_entry(km, &xfrm_km_list, list)
752                 km->notify(x, hard);
753         read_unlock(&xfrm_km_lock);
754
755         if (hard)
756                 wake_up(&km_waitq);
757 }
758
759 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
760 {
761         int err = -EINVAL;
762         struct xfrm_mgr *km;
763
764         read_lock(&xfrm_km_lock);
765         list_for_each_entry(km, &xfrm_km_list, list) {
766                 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
767                 if (!err)
768                         break;
769         }
770         read_unlock(&xfrm_km_lock);
771         return err;
772 }
773
774 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
775 {
776         int err = -EINVAL;
777         struct xfrm_mgr *km;
778
779         read_lock(&xfrm_km_lock);
780         list_for_each_entry(km, &xfrm_km_list, list) {
781                 if (km->new_mapping)
782                         err = km->new_mapping(x, ipaddr, sport);
783                 if (!err)
784                         break;
785         }
786         read_unlock(&xfrm_km_lock);
787         return err;
788 }
789
790 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
791 {
792         struct xfrm_mgr *km;
793
794         read_lock(&xfrm_km_lock);
795         list_for_each_entry(km, &xfrm_km_list, list)
796                 if (km->notify_policy)
797                         km->notify_policy(pol, dir, hard);
798         read_unlock(&xfrm_km_lock);
799
800         if (hard)
801                 wake_up(&km_waitq);
802 }
803
804 int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
805 {
806         int err;
807         u8 *data;
808         struct xfrm_mgr *km;
809         struct xfrm_policy *pol = NULL;
810
811         if (optlen <= 0 || optlen > PAGE_SIZE)
812                 return -EMSGSIZE;
813
814         data = kmalloc(optlen, GFP_KERNEL);
815         if (!data)
816                 return -ENOMEM;
817
818         err = -EFAULT;
819         if (copy_from_user(data, optval, optlen))
820                 goto out;
821
822         err = -EINVAL;
823         read_lock(&xfrm_km_lock);
824         list_for_each_entry(km, &xfrm_km_list, list) {
825                 pol = km->compile_policy(sk->sk_family, optname, data,
826                                          optlen, &err);
827                 if (err >= 0)
828                         break;
829         }
830         read_unlock(&xfrm_km_lock);
831
832         if (err >= 0) {
833                 xfrm_sk_policy_insert(sk, err, pol);
834                 xfrm_pol_put(pol);
835                 err = 0;
836         }
837
838 out:
839         kfree(data);
840         return err;
841 }
842
843 int xfrm_register_km(struct xfrm_mgr *km)
844 {
845         write_lock_bh(&xfrm_km_lock);
846         list_add_tail(&km->list, &xfrm_km_list);
847         write_unlock_bh(&xfrm_km_lock);
848         return 0;
849 }
850
851 int xfrm_unregister_km(struct xfrm_mgr *km)
852 {
853         write_lock_bh(&xfrm_km_lock);
854         list_del(&km->list);
855         write_unlock_bh(&xfrm_km_lock);
856         return 0;
857 }
858
859 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
860 {
861         int err = 0;
862         if (unlikely(afinfo == NULL))
863                 return -EINVAL;
864         if (unlikely(afinfo->family >= NPROTO))
865                 return -EAFNOSUPPORT;
866         write_lock(&xfrm_state_afinfo_lock);
867         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
868                 err = -ENOBUFS;
869         else {
870                 afinfo->state_bydst = xfrm_state_bydst;
871                 afinfo->state_byspi = xfrm_state_byspi;
872                 xfrm_state_afinfo[afinfo->family] = afinfo;
873         }
874         write_unlock(&xfrm_state_afinfo_lock);
875         return err;
876 }
877
878 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
879 {
880         int err = 0;
881         if (unlikely(afinfo == NULL))
882                 return -EINVAL;
883         if (unlikely(afinfo->family >= NPROTO))
884                 return -EAFNOSUPPORT;
885         write_lock(&xfrm_state_afinfo_lock);
886         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
887                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
888                         err = -EINVAL;
889                 else {
890                         xfrm_state_afinfo[afinfo->family] = NULL;
891                         afinfo->state_byspi = NULL;
892                         afinfo->state_bydst = NULL;
893                 }
894         }
895         write_unlock(&xfrm_state_afinfo_lock);
896         return err;
897 }
898
899 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
900 {
901         struct xfrm_state_afinfo *afinfo;
902         if (unlikely(family >= NPROTO))
903                 return NULL;
904         read_lock(&xfrm_state_afinfo_lock);
905         afinfo = xfrm_state_afinfo[family];
906         if (likely(afinfo != NULL))
907                 read_lock(&afinfo->lock);
908         read_unlock(&xfrm_state_afinfo_lock);
909         return afinfo;
910 }
911
912 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
913 {
914         if (unlikely(afinfo == NULL))
915                 return;
916         read_unlock(&afinfo->lock);
917 }
918
919 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
920 void xfrm_state_delete_tunnel(struct xfrm_state *x)
921 {
922         if (x->tunnel) {
923                 struct xfrm_state *t = x->tunnel;
924
925                 if (atomic_read(&t->tunnel_users) == 2)
926                         xfrm_state_delete(t);
927                 atomic_dec(&t->tunnel_users);
928                 xfrm_state_put(t);
929                 x->tunnel = NULL;
930         }
931 }
932
933 void __init xfrm_state_init(void)
934 {
935         int i;
936
937         for (i=0; i<XFRM_DST_HSIZE; i++) {
938                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
939                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
940         }
941         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
942 }
943