VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *      
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <asm/uaccess.h>
21
22 /* Each xfrm_state may be linked to two tables:
23
24    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
25    2. Hash table by daddr to find what SAs exist for given
26       destination/tunnel endpoint. (output)
27  */
28
29 static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
30
31 /* Hash table to find appropriate SA towards given target (endpoint
32  * of tunnel or destination of transport mode) allowed by selector.
33  *
34  * Main use is finding SA after policy selected tunnel or transport mode.
35  * Also, it can be used by ah/esp icmp error handler to find offending SA.
36  */
37 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
38 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
39
40 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
41
42 static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
43 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
44
45 static struct work_struct xfrm_state_gc_work;
46 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
47 static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
48
49 static void __xfrm_state_delete(struct xfrm_state *x);
50
51 static void xfrm_state_gc_destroy(struct xfrm_state *x)
52 {
53         if (del_timer(&x->timer))
54                 BUG();
55         if (x->aalg)
56                 kfree(x->aalg);
57         if (x->ealg)
58                 kfree(x->ealg);
59         if (x->calg)
60                 kfree(x->calg);
61         if (x->encap)
62                 kfree(x->encap);
63         if (x->type) {
64                 x->type->destructor(x);
65                 xfrm_put_type(x->type);
66         }
67         kfree(x);
68 }
69
70 static void xfrm_state_gc_task(void *data)
71 {
72         struct xfrm_state *x;
73         struct list_head *entry, *tmp;
74         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
75
76         spin_lock_bh(&xfrm_state_gc_lock);
77         list_splice_init(&xfrm_state_gc_list, &gc_list);
78         spin_unlock_bh(&xfrm_state_gc_lock);
79
80         list_for_each_safe(entry, tmp, &gc_list) {
81                 x = list_entry(entry, struct xfrm_state, bydst);
82                 xfrm_state_gc_destroy(x);
83         }
84         wake_up(&km_waitq);
85 }
86
87 static inline unsigned long make_jiffies(long secs)
88 {
89         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
90                 return MAX_SCHEDULE_TIMEOUT-1;
91         else
92                 return secs*HZ;
93 }
94
95 static void xfrm_timer_handler(unsigned long data)
96 {
97         struct xfrm_state *x = (struct xfrm_state*)data;
98         unsigned long now = (unsigned long)xtime.tv_sec;
99         long next = LONG_MAX;
100         int warn = 0;
101
102         spin_lock(&x->lock);
103         if (x->km.state == XFRM_STATE_DEAD)
104                 goto out;
105         if (x->km.state == XFRM_STATE_EXPIRED)
106                 goto expired;
107         if (x->lft.hard_add_expires_seconds) {
108                 long tmo = x->lft.hard_add_expires_seconds +
109                         x->curlft.add_time - now;
110                 if (tmo <= 0)
111                         goto expired;
112                 if (tmo < next)
113                         next = tmo;
114         }
115         if (x->lft.hard_use_expires_seconds) {
116                 long tmo = x->lft.hard_use_expires_seconds +
117                         (x->curlft.use_time ? : now) - now;
118                 if (tmo <= 0)
119                         goto expired;
120                 if (tmo < next)
121                         next = tmo;
122         }
123         if (x->km.dying)
124                 goto resched;
125         if (x->lft.soft_add_expires_seconds) {
126                 long tmo = x->lft.soft_add_expires_seconds +
127                         x->curlft.add_time - now;
128                 if (tmo <= 0)
129                         warn = 1;
130                 else if (tmo < next)
131                         next = tmo;
132         }
133         if (x->lft.soft_use_expires_seconds) {
134                 long tmo = x->lft.soft_use_expires_seconds +
135                         (x->curlft.use_time ? : now) - now;
136                 if (tmo <= 0)
137                         warn = 1;
138                 else if (tmo < next)
139                         next = tmo;
140         }
141
142         if (warn)
143                 km_state_expired(x, 0);
144 resched:
145         if (next != LONG_MAX &&
146             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
147                 xfrm_state_hold(x);
148         goto out;
149
150 expired:
151         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
152                 x->km.state = XFRM_STATE_EXPIRED;
153                 wake_up(&km_waitq);
154                 next = 2;
155                 goto resched;
156         }
157         if (x->id.spi != 0)
158                 km_state_expired(x, 1);
159         __xfrm_state_delete(x);
160
161 out:
162         spin_unlock(&x->lock);
163         xfrm_state_put(x);
164 }
165
166 struct xfrm_state *xfrm_state_alloc(void)
167 {
168         struct xfrm_state *x;
169
170         x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
171
172         if (x) {
173                 memset(x, 0, sizeof(struct xfrm_state));
174                 atomic_set(&x->refcnt, 1);
175                 atomic_set(&x->tunnel_users, 0);
176                 INIT_LIST_HEAD(&x->bydst);
177                 INIT_LIST_HEAD(&x->byspi);
178                 init_timer(&x->timer);
179                 x->timer.function = xfrm_timer_handler;
180                 x->timer.data     = (unsigned long)x;
181                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
182                 x->lft.soft_byte_limit = XFRM_INF;
183                 x->lft.soft_packet_limit = XFRM_INF;
184                 x->lft.hard_byte_limit = XFRM_INF;
185                 x->lft.hard_packet_limit = XFRM_INF;
186                 x->lock = SPIN_LOCK_UNLOCKED;
187         }
188         return x;
189 }
190
191 void __xfrm_state_destroy(struct xfrm_state *x)
192 {
193         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
194
195         spin_lock_bh(&xfrm_state_gc_lock);
196         list_add(&x->bydst, &xfrm_state_gc_list);
197         spin_unlock_bh(&xfrm_state_gc_lock);
198         schedule_work(&xfrm_state_gc_work);
199 }
200
201 static void __xfrm_state_delete(struct xfrm_state *x)
202 {
203         if (x->km.state != XFRM_STATE_DEAD) {
204                 x->km.state = XFRM_STATE_DEAD;
205                 spin_lock(&xfrm_state_lock);
206                 list_del(&x->bydst);
207                 atomic_dec(&x->refcnt);
208                 if (x->id.spi) {
209                         list_del(&x->byspi);
210                         atomic_dec(&x->refcnt);
211                 }
212                 spin_unlock(&xfrm_state_lock);
213                 if (del_timer(&x->timer))
214                         atomic_dec(&x->refcnt);
215
216                 /* The number two in this test is the reference
217                  * mentioned in the comment below plus the reference
218                  * our caller holds.  A larger value means that
219                  * there are DSTs attached to this xfrm_state.
220                  */
221                 if (atomic_read(&x->refcnt) > 2)
222                         xfrm_flush_bundles();
223
224                 /* All xfrm_state objects are created by xfrm_state_alloc.
225                  * The xfrm_state_alloc call gives a reference, and that
226                  * is what we are dropping here.
227                  */
228                 atomic_dec(&x->refcnt);
229         }
230 }
231
232 void xfrm_state_delete(struct xfrm_state *x)
233 {
234         spin_lock_bh(&x->lock);
235         __xfrm_state_delete(x);
236         spin_unlock_bh(&x->lock);
237 }
238
239 void xfrm_state_flush(u8 proto)
240 {
241         int i;
242         struct xfrm_state *x;
243
244         spin_lock_bh(&xfrm_state_lock);
245         for (i = 0; i < XFRM_DST_HSIZE; i++) {
246 restart:
247                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
248                         if (!xfrm_state_kern(x) &&
249                             (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
250                                 xfrm_state_hold(x);
251                                 spin_unlock_bh(&xfrm_state_lock);
252
253                                 xfrm_state_delete(x);
254                                 xfrm_state_put(x);
255
256                                 spin_lock_bh(&xfrm_state_lock);
257                                 goto restart;
258                         }
259                 }
260         }
261         spin_unlock_bh(&xfrm_state_lock);
262         wake_up(&km_waitq);
263 }
264
265 static int
266 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
267                   struct xfrm_tmpl *tmpl,
268                   xfrm_address_t *daddr, xfrm_address_t *saddr,
269                   unsigned short family)
270 {
271         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
272         if (!afinfo)
273                 return -1;
274         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
275         xfrm_state_put_afinfo(afinfo);
276         return 0;
277 }
278
279 struct xfrm_state *
280 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
281                 struct flowi *fl, struct xfrm_tmpl *tmpl,
282                 struct xfrm_policy *pol, int *err,
283                 unsigned short family)
284 {
285         unsigned h = xfrm_dst_hash(daddr, family);
286         struct xfrm_state *x;
287         int acquire_in_progress = 0;
288         int error = 0;
289         struct xfrm_state *best = NULL;
290
291         spin_lock_bh(&xfrm_state_lock);
292         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
293                 if (x->props.family == family &&
294                     x->props.reqid == tmpl->reqid &&
295                     xfrm_state_addr_check(x, daddr, saddr, family) &&
296                     tmpl->mode == x->props.mode &&
297                     tmpl->id.proto == x->id.proto) {
298                         /* Resolution logic:
299                            1. There is a valid state with matching selector.
300                               Done.
301                            2. Valid state with inappropriate selector. Skip.
302
303                            Entering area of "sysdeps".
304
305                            3. If state is not valid, selector is temporary,
306                               it selects only session which triggered
307                               previous resolution. Key manager will do
308                               something to install a state with proper
309                               selector.
310                          */
311                         if (x->km.state == XFRM_STATE_VALID) {
312                                 if (!xfrm_selector_match(&x->sel, fl, family))
313                                         continue;
314                                 if (!best ||
315                                     best->km.dying > x->km.dying ||
316                                     (best->km.dying == x->km.dying &&
317                                      best->curlft.add_time < x->curlft.add_time))
318                                         best = x;
319                         } else if (x->km.state == XFRM_STATE_ACQ) {
320                                 acquire_in_progress = 1;
321                         } else if (x->km.state == XFRM_STATE_ERROR ||
322                                    x->km.state == XFRM_STATE_EXPIRED) {
323                                 if (xfrm_selector_match(&x->sel, fl, family))
324                                         error = 1;
325                         }
326                 }
327         }
328
329         x = best;
330         if (!x && !error && !acquire_in_progress &&
331             ((x = xfrm_state_alloc()) != NULL)) {
332                 /* Initialize temporary selector matching only
333                  * to current session. */
334                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
335
336                 if (km_query(x, tmpl, pol) == 0) {
337                         x->km.state = XFRM_STATE_ACQ;
338                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
339                         xfrm_state_hold(x);
340                         if (x->id.spi) {
341                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
342                                 list_add(&x->byspi, xfrm_state_byspi+h);
343                                 xfrm_state_hold(x);
344                         }
345                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
346                         xfrm_state_hold(x);
347                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
348                         add_timer(&x->timer);
349                 } else {
350                         x->km.state = XFRM_STATE_DEAD;
351                         xfrm_state_put(x);
352                         x = NULL;
353                         error = 1;
354                 }
355         }
356         if (x)
357                 xfrm_state_hold(x);
358         else
359                 *err = acquire_in_progress ? -EAGAIN :
360                         (error ? -ESRCH : -ENOMEM);
361         spin_unlock_bh(&xfrm_state_lock);
362         return x;
363 }
364
365 static void __xfrm_state_insert(struct xfrm_state *x)
366 {
367         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
368
369         list_add(&x->bydst, xfrm_state_bydst+h);
370         xfrm_state_hold(x);
371
372         h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
373
374         list_add(&x->byspi, xfrm_state_byspi+h);
375         xfrm_state_hold(x);
376
377         if (!mod_timer(&x->timer, jiffies + HZ))
378                 xfrm_state_hold(x);
379
380         wake_up(&km_waitq);
381 }
382
383 void xfrm_state_insert(struct xfrm_state *x)
384 {
385         spin_lock_bh(&xfrm_state_lock);
386         __xfrm_state_insert(x);
387         spin_unlock_bh(&xfrm_state_lock);
388 }
389
390 int xfrm_state_add(struct xfrm_state *x)
391 {
392         struct xfrm_state_afinfo *afinfo;
393         struct xfrm_state *x1;
394         int err;
395
396         afinfo = xfrm_state_get_afinfo(x->props.family);
397         if (unlikely(afinfo == NULL))
398                 return -EAFNOSUPPORT;
399
400         spin_lock_bh(&xfrm_state_lock);
401
402         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
403         if (x1) {
404                 xfrm_state_put(x1);
405                 x1 = NULL;
406                 err = -EEXIST;
407                 goto out;
408         }
409
410         x1 = afinfo->find_acq(
411                 x->props.mode, x->props.reqid, x->id.proto,
412                 &x->id.daddr, &x->props.saddr, 0);
413
414         __xfrm_state_insert(x);
415         err = 0;
416
417 out:
418         spin_unlock_bh(&xfrm_state_lock);
419         xfrm_state_put_afinfo(afinfo);
420
421         if (x1) {
422                 xfrm_state_delete(x1);
423                 xfrm_state_put(x1);
424         }
425
426         return err;
427 }
428
429 int xfrm_state_update(struct xfrm_state *x)
430 {
431         struct xfrm_state_afinfo *afinfo;
432         struct xfrm_state *x1;
433         int err;
434
435         afinfo = xfrm_state_get_afinfo(x->props.family);
436         if (unlikely(afinfo == NULL))
437                 return -EAFNOSUPPORT;
438
439         spin_lock_bh(&xfrm_state_lock);
440         x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
441
442         err = -ESRCH;
443         if (!x1)
444                 goto out;
445
446         if (xfrm_state_kern(x1)) {
447                 xfrm_state_put(x1);
448                 err = -EEXIST;
449                 goto out;
450         }
451
452         if (x1->km.state == XFRM_STATE_ACQ) {
453                 __xfrm_state_insert(x);
454                 x = NULL;
455         }
456         err = 0;
457
458 out:
459         spin_unlock_bh(&xfrm_state_lock);
460         xfrm_state_put_afinfo(afinfo);
461
462         if (err)
463                 return err;
464
465         if (!x) {
466                 xfrm_state_delete(x1);
467                 xfrm_state_put(x1);
468                 return 0;
469         }
470
471         err = -EINVAL;
472         spin_lock_bh(&x1->lock);
473         if (likely(x1->km.state == XFRM_STATE_VALID)) {
474                 if (x->encap && x1->encap)
475                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
476                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
477                 x1->km.dying = 0;
478
479                 if (!mod_timer(&x1->timer, jiffies + HZ))
480                         xfrm_state_hold(x1);
481                 if (x1->curlft.use_time)
482                         xfrm_state_check_expire(x1);
483
484                 err = 0;
485         }
486         spin_unlock_bh(&x1->lock);
487
488         xfrm_state_put(x1);
489
490         return err;
491 }
492
493 int xfrm_state_check_expire(struct xfrm_state *x)
494 {
495         if (!x->curlft.use_time)
496                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
497
498         if (x->km.state != XFRM_STATE_VALID)
499                 return -EINVAL;
500
501         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
502             x->curlft.packets >= x->lft.hard_packet_limit) {
503                 km_state_expired(x, 1);
504                 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
505                         xfrm_state_hold(x);
506                 return -EINVAL;
507         }
508
509         if (!x->km.dying &&
510             (x->curlft.bytes >= x->lft.soft_byte_limit ||
511              x->curlft.packets >= x->lft.soft_packet_limit))
512                 km_state_expired(x, 0);
513         return 0;
514 }
515
516 int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
517 {
518         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
519                 - skb_headroom(skb);
520
521         if (nhead > 0)
522                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
523
524         /* Check tail too... */
525         return 0;
526 }
527
528 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
529 {
530         int err = xfrm_state_check_expire(x);
531         if (err < 0)
532                 goto err;
533         err = xfrm_state_check_space(x, skb);
534 err:
535         return err;
536 }
537
538 struct xfrm_state *
539 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
540                   unsigned short family)
541 {
542         struct xfrm_state *x;
543         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
544         if (!afinfo)
545                 return NULL;
546
547         spin_lock_bh(&xfrm_state_lock);
548         x = afinfo->state_lookup(daddr, spi, proto);
549         spin_unlock_bh(&xfrm_state_lock);
550         xfrm_state_put_afinfo(afinfo);
551         return x;
552 }
553
554 struct xfrm_state *
555 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
556               xfrm_address_t *daddr, xfrm_address_t *saddr, 
557               int create, unsigned short family)
558 {
559         struct xfrm_state *x;
560         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
561         if (!afinfo)
562                 return NULL;
563
564         spin_lock_bh(&xfrm_state_lock);
565         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
566         spin_unlock_bh(&xfrm_state_lock);
567         xfrm_state_put_afinfo(afinfo);
568         return x;
569 }
570
571 /* Silly enough, but I'm lazy to build resolution list */
572
573 struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
574 {
575         int i;
576         struct xfrm_state *x;
577
578         spin_lock_bh(&xfrm_state_lock);
579         for (i = 0; i < XFRM_DST_HSIZE; i++) {
580                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
581                         if (x->km.seq == seq) {
582                                 xfrm_state_hold(x);
583                                 spin_unlock_bh(&xfrm_state_lock);
584                                 return x;
585                         }
586                 }
587         }
588         spin_unlock_bh(&xfrm_state_lock);
589         return NULL;
590 }
591  
592 u32 xfrm_get_acqseq(void)
593 {
594         u32 res;
595         static u32 acqseq;
596         static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
597
598         spin_lock_bh(&acqseq_lock);
599         res = (++acqseq ? : ++acqseq);
600         spin_unlock_bh(&acqseq_lock);
601         return res;
602 }
603
604 void
605 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
606 {
607         u32 h;
608         struct xfrm_state *x0;
609
610         if (x->id.spi)
611                 return;
612
613         if (minspi == maxspi) {
614                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
615                 if (x0) {
616                         xfrm_state_put(x0);
617                         return;
618                 }
619                 x->id.spi = minspi;
620         } else {
621                 u32 spi = 0;
622                 minspi = ntohl(minspi);
623                 maxspi = ntohl(maxspi);
624                 for (h=0; h<maxspi-minspi+1; h++) {
625                         spi = minspi + net_random()%(maxspi-minspi+1);
626                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
627                         if (x0 == NULL) {
628                                 x->id.spi = htonl(spi);
629                                 break;
630                         }
631                         xfrm_state_put(x0);
632                 }
633         }
634         if (x->id.spi) {
635                 spin_lock_bh(&xfrm_state_lock);
636                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
637                 list_add(&x->byspi, xfrm_state_byspi+h);
638                 xfrm_state_hold(x);
639                 spin_unlock_bh(&xfrm_state_lock);
640                 wake_up(&km_waitq);
641         }
642 }
643
644 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
645                     void *data)
646 {
647         int i;
648         struct xfrm_state *x;
649         int count = 0;
650         int err = 0;
651
652         spin_lock_bh(&xfrm_state_lock);
653         for (i = 0; i < XFRM_DST_HSIZE; i++) {
654                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
655                         if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
656                                 count++;
657                 }
658         }
659         if (count == 0) {
660                 err = -ENOENT;
661                 goto out;
662         }
663
664         for (i = 0; i < XFRM_DST_HSIZE; i++) {
665                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
666                         if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
667                                 continue;
668                         err = func(x, --count, data);
669                         if (err)
670                                 goto out;
671                 }
672         }
673 out:
674         spin_unlock_bh(&xfrm_state_lock);
675         return err;
676 }
677
678
679 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
680 {
681         u32 diff;
682
683         seq = ntohl(seq);
684
685         if (unlikely(seq == 0))
686                 return -EINVAL;
687
688         if (likely(seq > x->replay.seq))
689                 return 0;
690
691         diff = x->replay.seq - seq;
692         if (diff >= x->props.replay_window) {
693                 x->stats.replay_window++;
694                 return -EINVAL;
695         }
696
697         if (x->replay.bitmap & (1U << diff)) {
698                 x->stats.replay++;
699                 return -EINVAL;
700         }
701         return 0;
702 }
703
704 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
705 {
706         u32 diff;
707
708         seq = ntohl(seq);
709
710         if (seq > x->replay.seq) {
711                 diff = seq - x->replay.seq;
712                 if (diff < x->props.replay_window)
713                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
714                 else
715                         x->replay.bitmap = 1;
716                 x->replay.seq = seq;
717         } else {
718                 diff = x->replay.seq - seq;
719                 x->replay.bitmap |= (1U << diff);
720         }
721 }
722
723 int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
724 {
725         int i;
726
727         for (i=0; i<n; i++) {
728                 int match;
729                 match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
730                 if (!match)
731                         return -EINVAL;
732         }
733         return 0;
734 }
735
736 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
737 static rwlock_t         xfrm_km_lock = RW_LOCK_UNLOCKED;
738
739 void km_state_expired(struct xfrm_state *x, int hard)
740 {
741         struct xfrm_mgr *km;
742
743         if (hard)
744                 x->km.state = XFRM_STATE_EXPIRED;
745         else
746                 x->km.dying = 1;
747
748         read_lock(&xfrm_km_lock);
749         list_for_each_entry(km, &xfrm_km_list, list)
750                 km->notify(x, hard);
751         read_unlock(&xfrm_km_lock);
752
753         if (hard)
754                 wake_up(&km_waitq);
755 }
756
757 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
758 {
759         int err = -EINVAL;
760         struct xfrm_mgr *km;
761
762         read_lock(&xfrm_km_lock);
763         list_for_each_entry(km, &xfrm_km_list, list) {
764                 err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
765                 if (!err)
766                         break;
767         }
768         read_unlock(&xfrm_km_lock);
769         return err;
770 }
771
772 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
773 {
774         int err = -EINVAL;
775         struct xfrm_mgr *km;
776
777         read_lock(&xfrm_km_lock);
778         list_for_each_entry(km, &xfrm_km_list, list) {
779                 if (km->new_mapping)
780                         err = km->new_mapping(x, ipaddr, sport);
781                 if (!err)
782                         break;
783         }
784         read_unlock(&xfrm_km_lock);
785         return err;
786 }
787
788 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
789 {
790         struct xfrm_mgr *km;
791
792         read_lock(&xfrm_km_lock);
793         list_for_each_entry(km, &xfrm_km_list, list)
794                 if (km->notify_policy)
795                         km->notify_policy(pol, dir, hard);
796         read_unlock(&xfrm_km_lock);
797
798         if (hard)
799                 wake_up(&km_waitq);
800 }
801
802 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
803 {
804         int err;
805         u8 *data;
806         struct xfrm_mgr *km;
807         struct xfrm_policy *pol = NULL;
808
809         if (optlen <= 0 || optlen > PAGE_SIZE)
810                 return -EMSGSIZE;
811
812         data = kmalloc(optlen, GFP_KERNEL);
813         if (!data)
814                 return -ENOMEM;
815
816         err = -EFAULT;
817         if (copy_from_user(data, optval, optlen))
818                 goto out;
819
820         err = -EINVAL;
821         read_lock(&xfrm_km_lock);
822         list_for_each_entry(km, &xfrm_km_list, list) {
823                 pol = km->compile_policy(sk->sk_family, optname, data,
824                                          optlen, &err);
825                 if (err >= 0)
826                         break;
827         }
828         read_unlock(&xfrm_km_lock);
829
830         if (err >= 0) {
831                 xfrm_sk_policy_insert(sk, err, pol);
832                 xfrm_pol_put(pol);
833                 err = 0;
834         }
835
836 out:
837         kfree(data);
838         return err;
839 }
840
841 int xfrm_register_km(struct xfrm_mgr *km)
842 {
843         write_lock_bh(&xfrm_km_lock);
844         list_add_tail(&km->list, &xfrm_km_list);
845         write_unlock_bh(&xfrm_km_lock);
846         return 0;
847 }
848
849 int xfrm_unregister_km(struct xfrm_mgr *km)
850 {
851         write_lock_bh(&xfrm_km_lock);
852         list_del(&km->list);
853         write_unlock_bh(&xfrm_km_lock);
854         return 0;
855 }
856
857 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
858 {
859         int err = 0;
860         if (unlikely(afinfo == NULL))
861                 return -EINVAL;
862         if (unlikely(afinfo->family >= NPROTO))
863                 return -EAFNOSUPPORT;
864         write_lock(&xfrm_state_afinfo_lock);
865         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
866                 err = -ENOBUFS;
867         else {
868                 afinfo->state_bydst = xfrm_state_bydst;
869                 afinfo->state_byspi = xfrm_state_byspi;
870                 xfrm_state_afinfo[afinfo->family] = afinfo;
871         }
872         write_unlock(&xfrm_state_afinfo_lock);
873         return err;
874 }
875
876 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
877 {
878         int err = 0;
879         if (unlikely(afinfo == NULL))
880                 return -EINVAL;
881         if (unlikely(afinfo->family >= NPROTO))
882                 return -EAFNOSUPPORT;
883         write_lock(&xfrm_state_afinfo_lock);
884         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
885                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
886                         err = -EINVAL;
887                 else {
888                         xfrm_state_afinfo[afinfo->family] = NULL;
889                         afinfo->state_byspi = NULL;
890                         afinfo->state_bydst = NULL;
891                 }
892         }
893         write_unlock(&xfrm_state_afinfo_lock);
894         return err;
895 }
896
897 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
898 {
899         struct xfrm_state_afinfo *afinfo;
900         if (unlikely(family >= NPROTO))
901                 return NULL;
902         read_lock(&xfrm_state_afinfo_lock);
903         afinfo = xfrm_state_afinfo[family];
904         if (likely(afinfo != NULL))
905                 read_lock(&afinfo->lock);
906         read_unlock(&xfrm_state_afinfo_lock);
907         return afinfo;
908 }
909
910 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
911 {
912         if (unlikely(afinfo == NULL))
913                 return;
914         read_unlock(&afinfo->lock);
915 }
916
917 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
918 void xfrm_state_delete_tunnel(struct xfrm_state *x)
919 {
920         if (x->tunnel) {
921                 struct xfrm_state *t = x->tunnel;
922
923                 if (atomic_read(&t->tunnel_users) == 2)
924                         xfrm_state_delete(t);
925                 atomic_dec(&t->tunnel_users);
926                 xfrm_state_put(t);
927                 x->tunnel = NULL;
928         }
929 }
930
931 void __init xfrm_state_init(void)
932 {
933         int i;
934
935         for (i=0; i<XFRM_DST_HSIZE; i++) {
936                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
937                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
938         }
939         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
940 }
941