Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / net / xfrm / xfrm_state.c
index dc70df7..93a2f36 100644 (file)
  *             Split up af-specific functions
  *     Derek Atkins <derek@ihtfp.com>
  *             Add UDP Encapsulation
- *     
+ *
  */
 
 #include <linux/workqueue.h>
 #include <net/xfrm.h>
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
 
+struct sock *xfrm_nl;
+EXPORT_SYMBOL(xfrm_nl);
+
+u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
+EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
+
+u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
+EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
+
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
@@ -26,7 +36,7 @@
       destination/tunnel endpoint. (output)
  */
 
-static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xfrm_state_lock);
 
 /* Hash table to find appropriate SA towards given target (endpoint
  * of tunnel or destination of transport mode) allowed by selector.
@@ -38,34 +48,41 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
 
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
+EXPORT_SYMBOL(km_waitq);
 
-static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
-static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xfrm_state_gc_lock);
+
+static int xfrm_state_gc_flush_bundles;
+
+int __xfrm_state_delete(struct xfrm_state *x);
 
-static void __xfrm_state_delete(struct xfrm_state *x);
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+
+int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
+void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
 
 static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
        if (del_timer(&x->timer))
                BUG();
-       if (x->aalg)
-               kfree(x->aalg);
-       if (x->ealg)
-               kfree(x->ealg);
-       if (x->calg)
-               kfree(x->calg);
-       if (x->encap)
-               kfree(x->encap);
+       if (del_timer(&x->rtimer))
+               BUG();
+       kfree(x->aalg);
+       kfree(x->ealg);
+       kfree(x->calg);
+       kfree(x->encap);
        if (x->type) {
                x->type->destructor(x);
                xfrm_put_type(x->type);
        }
+       security_xfrm_state_free(x);
        kfree(x);
-       wake_up(&km_waitq);
 }
 
 static void xfrm_state_gc_task(void *data)
@@ -74,6 +91,11 @@ static void xfrm_state_gc_task(void *data)
        struct list_head *entry, *tmp;
        struct list_head gc_list = LIST_HEAD_INIT(gc_list);
 
+       if (xfrm_state_gc_flush_bundles) {
+               xfrm_state_gc_flush_bundles = 0;
+               xfrm_flush_bundles();
+       }
+
        spin_lock_bh(&xfrm_state_gc_lock);
        list_splice_init(&xfrm_state_gc_list, &gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
@@ -82,6 +104,7 @@ static void xfrm_state_gc_task(void *data)
                x = list_entry(entry, struct xfrm_state, bydst);
                xfrm_state_gc_destroy(x);
        }
+       wake_up(&km_waitq);
 }
 
 static inline unsigned long make_jiffies(long secs)
@@ -139,8 +162,9 @@ static void xfrm_timer_handler(unsigned long data)
                        next = tmo;
        }
 
+       x->km.dying = warn;
        if (warn)
-               km_state_expired(x, 0);
+               km_state_expired(x, 0, 0);
 resched:
        if (next != LONG_MAX &&
            !mod_timer(&x->timer, jiffies + make_jiffies(next)))
@@ -154,15 +178,16 @@ expired:
                next = 2;
                goto resched;
        }
-       if (x->id.spi != 0)
-               km_state_expired(x, 1);
-       __xfrm_state_delete(x);
+       if (!__xfrm_state_delete(x) && x->id.spi)
+               km_state_expired(x, 1, 0);
 
 out:
        spin_unlock(&x->lock);
        xfrm_state_put(x);
 }
 
+static void xfrm_replay_timer_handler(unsigned long data);
+
 struct xfrm_state *xfrm_state_alloc(void)
 {
        struct xfrm_state *x;
@@ -178,15 +203,21 @@ struct xfrm_state *xfrm_state_alloc(void)
                init_timer(&x->timer);
                x->timer.function = xfrm_timer_handler;
                x->timer.data     = (unsigned long)x;
+               init_timer(&x->rtimer);
+               x->rtimer.function = xfrm_replay_timer_handler;
+               x->rtimer.data     = (unsigned long)x;
                x->curlft.add_time = (unsigned long)xtime.tv_sec;
                x->lft.soft_byte_limit = XFRM_INF;
                x->lft.soft_packet_limit = XFRM_INF;
                x->lft.hard_byte_limit = XFRM_INF;
                x->lft.hard_packet_limit = XFRM_INF;
-               x->lock = SPIN_LOCK_UNLOCKED;
+               x->replay_maxage = 0;
+               x->replay_maxdiff = 0;
+               spin_lock_init(&x->lock);
        }
        return x;
 }
+EXPORT_SYMBOL(xfrm_state_alloc);
 
 void __xfrm_state_destroy(struct xfrm_state *x)
 {
@@ -197,49 +228,60 @@ void __xfrm_state_destroy(struct xfrm_state *x)
        spin_unlock_bh(&xfrm_state_gc_lock);
        schedule_work(&xfrm_state_gc_work);
 }
+EXPORT_SYMBOL(__xfrm_state_destroy);
 
-static void __xfrm_state_delete(struct xfrm_state *x)
+int __xfrm_state_delete(struct xfrm_state *x)
 {
+       int err = -ESRCH;
+
        if (x->km.state != XFRM_STATE_DEAD) {
                x->km.state = XFRM_STATE_DEAD;
                spin_lock(&xfrm_state_lock);
                list_del(&x->bydst);
-               atomic_dec(&x->refcnt);
+               __xfrm_state_put(x);
                if (x->id.spi) {
                        list_del(&x->byspi);
-                       atomic_dec(&x->refcnt);
+                       __xfrm_state_put(x);
                }
                spin_unlock(&xfrm_state_lock);
                if (del_timer(&x->timer))
-                       atomic_dec(&x->refcnt);
+                       __xfrm_state_put(x);
+               if (del_timer(&x->rtimer))
+                       __xfrm_state_put(x);
 
                /* The number two in this test is the reference
                 * mentioned in the comment below plus the reference
                 * our caller holds.  A larger value means that
                 * there are DSTs attached to this xfrm_state.
                 */
-               if (atomic_read(&x->refcnt) > 2)
-                       xfrm_flush_bundles();
-
-               /* All xfrm_state objects are created by one of two possible
-                * paths:
-                *
-                * 2) xfrm_state_lookup --> xfrm_state_insert
-                *
-                * The xfrm_state_lookup or xfrm_state_alloc call gives a
-                * reference, and that is what we are dropping here.
+               if (atomic_read(&x->refcnt) > 2) {
+                       xfrm_state_gc_flush_bundles = 1;
+                       schedule_work(&xfrm_state_gc_work);
+               }
+
+               /* All xfrm_state objects are created by xfrm_state_alloc.
+                * The xfrm_state_alloc call gives a reference, and that
+                * is what we are dropping here.
                 */
-               atomic_dec(&x->refcnt);
+               __xfrm_state_put(x);
+               err = 0;
        }
+
+       return err;
 }
+EXPORT_SYMBOL(__xfrm_state_delete);
 
-void xfrm_state_delete(struct xfrm_state *x)
+int xfrm_state_delete(struct xfrm_state *x)
 {
-       xfrm_state_delete_tunnel(x);
+       int err;
+
        spin_lock_bh(&x->lock);
-       __xfrm_state_delete(x);
+       err = __xfrm_state_delete(x);
        spin_unlock_bh(&x->lock);
+
+       return err;
 }
+EXPORT_SYMBOL(xfrm_state_delete);
 
 void xfrm_state_flush(u8 proto)
 {
@@ -266,6 +308,7 @@ restart:
        spin_unlock_bh(&xfrm_state_lock);
        wake_up(&km_waitq);
 }
+EXPORT_SYMBOL(xfrm_state_flush);
 
 static int
 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
@@ -288,10 +331,17 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                unsigned short family)
 {
        unsigned h = xfrm_dst_hash(daddr, family);
-       struct xfrm_state *x;
+       struct xfrm_state *x, *x0;
        int acquire_in_progress = 0;
        int error = 0;
        struct xfrm_state *best = NULL;
+       struct xfrm_state_afinfo *afinfo;
+       
+       afinfo = xfrm_state_get_afinfo(family);
+       if (afinfo == NULL) {
+               *err = -EAFNOSUPPORT;
+               return NULL;
+       }
 
        spin_lock_bh(&xfrm_state_lock);
        list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
@@ -299,7 +349,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                    x->props.reqid == tmpl->reqid &&
                    xfrm_state_addr_check(x, daddr, saddr, family) &&
                    tmpl->mode == x->props.mode &&
-                   tmpl->id.proto == x->id.proto) {
+                   tmpl->id.proto == x->id.proto &&
+                   (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
                        /* Resolution logic:
                           1. There is a valid state with matching selector.
                              Done.
@@ -314,7 +365,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                              selector.
                         */
                        if (x->km.state == XFRM_STATE_VALID) {
-                               if (!xfrm_selector_match(&x->sel, fl, family))
+                               if (!xfrm_selector_match(&x->sel, fl, family) ||
+                                   !xfrm_sec_ctx_match(pol->security, x->security))
                                        continue;
                                if (!best ||
                                    best->km.dying > x->km.dying ||
@@ -325,21 +377,27 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                                acquire_in_progress = 1;
                        } else if (x->km.state == XFRM_STATE_ERROR ||
                                   x->km.state == XFRM_STATE_EXPIRED) {
-                               if (xfrm_selector_match(&x->sel, fl, family))
-                                       error = 1;
+                               if (xfrm_selector_match(&x->sel, fl, family) &&
+                                   xfrm_sec_ctx_match(pol->security, x->security))
+                                       error = -ESRCH;
                        }
                }
        }
 
-       if (best) {
-               xfrm_state_hold(best);
-               spin_unlock_bh(&xfrm_state_lock);
-               return best;
-       }
-
-       x = NULL;
-       if (!error && !acquire_in_progress &&
-           ((x = xfrm_state_alloc()) != NULL)) {
+       x = best;
+       if (!x && !error && !acquire_in_progress) {
+               if (tmpl->id.spi &&
+                   (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
+                                              tmpl->id.proto)) != NULL) {
+                       xfrm_state_put(x0);
+                       error = -EEXIST;
+                       goto out;
+               }
+               x = xfrm_state_alloc();
+               if (x == NULL) {
+                       error = -ENOMEM;
+                       goto out;
+               }
                /* Initialize temporary selector matching only
                 * to current session. */
                xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
@@ -355,18 +413,22 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                        }
                        x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
                        xfrm_state_hold(x);
-                       mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ);
+                       x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+                       add_timer(&x->timer);
                } else {
                        x->km.state = XFRM_STATE_DEAD;
                        xfrm_state_put(x);
                        x = NULL;
-                       error = 1;
+                       error = -ESRCH;
                }
        }
+out:
+       if (x)
+               xfrm_state_hold(x);
+       else
+               *err = acquire_in_progress ? -EAGAIN : error;
        spin_unlock_bh(&xfrm_state_lock);
-       if (!x)
-               *err = acquire_in_progress ? -EAGAIN :
-                       (error ? -ESRCH : -ENOMEM);
+       xfrm_state_put_afinfo(afinfo);
        return x;
 }
 
@@ -385,6 +447,10 @@ static void __xfrm_state_insert(struct xfrm_state *x)
        if (!mod_timer(&x->timer, jiffies + HZ))
                xfrm_state_hold(x);
 
+       if (x->replay_maxage &&
+           !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
+               xfrm_state_hold(x);
+
        wake_up(&km_waitq);
 }
 
@@ -393,38 +459,48 @@ void xfrm_state_insert(struct xfrm_state *x)
        spin_lock_bh(&xfrm_state_lock);
        __xfrm_state_insert(x);
        spin_unlock_bh(&xfrm_state_lock);
+
+       xfrm_flush_all_bundles();
 }
+EXPORT_SYMBOL(xfrm_state_insert);
+
+static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
 
 int xfrm_state_add(struct xfrm_state *x)
 {
        struct xfrm_state_afinfo *afinfo;
        struct xfrm_state *x1;
+       int family;
        int err;
 
-       afinfo = xfrm_state_get_afinfo(x->props.family);
+       family = x->props.family;
+       afinfo = xfrm_state_get_afinfo(family);
        if (unlikely(afinfo == NULL))
                return -EAFNOSUPPORT;
 
        spin_lock_bh(&xfrm_state_lock);
 
        x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
-       if (!x1) {
-               x1 = afinfo->find_acq(
-                       x->props.mode, x->props.reqid, x->id.proto,
-                       &x->id.daddr, &x->props.saddr, 0);
-               if (x1 && x1->id.spi != x->id.spi && x1->id.spi) {
-                       xfrm_state_put(x1);
-                       x1 = NULL;
-               }
-       }
-
-       if (x1 && x1->id.spi) {
+       if (x1) {
                xfrm_state_put(x1);
                x1 = NULL;
                err = -EEXIST;
                goto out;
        }
 
+       if (x->km.seq) {
+               x1 = __xfrm_find_acq_byseq(x->km.seq);
+               if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
+                       xfrm_state_put(x1);
+                       x1 = NULL;
+               }
+       }
+
+       if (!x1)
+               x1 = afinfo->find_acq(
+                       x->props.mode, x->props.reqid, x->id.proto,
+                       &x->id.daddr, &x->props.saddr, 0);
+
        __xfrm_state_insert(x);
        err = 0;
 
@@ -432,6 +508,9 @@ out:
        spin_unlock_bh(&xfrm_state_lock);
        xfrm_state_put_afinfo(afinfo);
 
+       if (!err)
+               xfrm_flush_all_bundles();
+
        if (x1) {
                xfrm_state_delete(x1);
                xfrm_state_put(x1);
@@ -439,6 +518,7 @@ out:
 
        return err;
 }
+EXPORT_SYMBOL(xfrm_state_add);
 
 int xfrm_state_update(struct xfrm_state *x)
 {
@@ -489,19 +569,21 @@ out:
                        memcpy(x1->encap, x->encap, sizeof(*x1->encap));
                memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
                x1->km.dying = 0;
+
+               if (!mod_timer(&x1->timer, jiffies + HZ))
+                       xfrm_state_hold(x1);
+               if (x1->curlft.use_time)
+                       xfrm_state_check_expire(x1);
+
                err = 0;
        }
        spin_unlock_bh(&x1->lock);
 
-       if (!mod_timer(&x1->timer, jiffies + HZ))
-               xfrm_state_hold(x1);
-       if (x1->curlft.use_time)
-               xfrm_state_check_expire(x1);
-
        xfrm_state_put(x1);
 
        return err;
 }
+EXPORT_SYMBOL(xfrm_state_update);
 
 int xfrm_state_check_expire(struct xfrm_state *x)
 {
@@ -513,20 +595,23 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 
        if (x->curlft.bytes >= x->lft.hard_byte_limit ||
            x->curlft.packets >= x->lft.hard_packet_limit) {
-               km_state_expired(x, 1);
-               if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
+               x->km.state = XFRM_STATE_EXPIRED;
+               if (!mod_timer(&x->timer, jiffies))
                        xfrm_state_hold(x);
                return -EINVAL;
        }
 
        if (!x->km.dying &&
            (x->curlft.bytes >= x->lft.soft_byte_limit ||
-            x->curlft.packets >= x->lft.soft_packet_limit))
-               km_state_expired(x, 0);
+            x->curlft.packets >= x->lft.soft_packet_limit)) {
+               x->km.dying = 1;
+               km_state_expired(x, 0, 0);
+       }
        return 0;
 }
+EXPORT_SYMBOL(xfrm_state_check_expire);
 
-int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 {
        int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
                - skb_headroom(skb);
@@ -538,6 +623,17 @@ int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
        return 0;
 }
 
+int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
+{
+       int err = xfrm_state_check_expire(x);
+       if (err < 0)
+               goto err;
+       err = xfrm_state_check_space(x, skb);
+err:
+       return err;
+}
+EXPORT_SYMBOL(xfrm_state_check);
+
 struct xfrm_state *
 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
                  unsigned short family)
@@ -553,6 +649,7 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
        xfrm_state_put_afinfo(afinfo);
        return x;
 }
+EXPORT_SYMBOL(xfrm_state_lookup);
 
 struct xfrm_state *
 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
@@ -570,39 +667,49 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
        xfrm_state_put_afinfo(afinfo);
        return x;
 }
+EXPORT_SYMBOL(xfrm_find_acq);
 
 /* Silly enough, but I'm lazy to build resolution list */
 
-struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
+static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
 {
        int i;
        struct xfrm_state *x;
 
-       spin_lock_bh(&xfrm_state_lock);
        for (i = 0; i < XFRM_DST_HSIZE; i++) {
                list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-                       if (x->km.seq == seq) {
+                       if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
                                xfrm_state_hold(x);
-                               spin_unlock_bh(&xfrm_state_lock);
                                return x;
                        }
                }
        }
-       spin_unlock_bh(&xfrm_state_lock);
        return NULL;
 }
+
+struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
+{
+       struct xfrm_state *x;
+
+       spin_lock_bh(&xfrm_state_lock);
+       x = __xfrm_find_acq_byseq(seq);
+       spin_unlock_bh(&xfrm_state_lock);
+       return x;
+}
+EXPORT_SYMBOL(xfrm_find_acq_byseq);
+
 u32 xfrm_get_acqseq(void)
 {
        u32 res;
        static u32 acqseq;
-       static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
+       static DEFINE_SPINLOCK(acqseq_lock);
 
        spin_lock_bh(&acqseq_lock);
        res = (++acqseq ? : ++acqseq);
        spin_unlock_bh(&acqseq_lock);
        return res;
 }
+EXPORT_SYMBOL(xfrm_get_acqseq);
 
 void
 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
@@ -627,11 +734,12 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
                for (h=0; h<maxspi-minspi+1; h++) {
                        spi = minspi + net_random()%(maxspi-minspi+1);
                        x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
-                       if (x0 == NULL)
+                       if (x0 == NULL) {
+                               x->id.spi = htonl(spi);
                                break;
+                       }
                        xfrm_state_put(x0);
                }
-               x->id.spi = htonl(spi);
        }
        if (x->id.spi) {
                spin_lock_bh(&xfrm_state_lock);
@@ -642,6 +750,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
                wake_up(&km_waitq);
        }
 }
+EXPORT_SYMBOL(xfrm_alloc_spi);
 
 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
                    void *data)
@@ -676,7 +785,75 @@ out:
        spin_unlock_bh(&xfrm_state_lock);
        return err;
 }
+EXPORT_SYMBOL(xfrm_state_walk);
+
+
+void xfrm_replay_notify(struct xfrm_state *x, int event)
+{
+       struct km_event c;
+       /* we send notify messages in case
+        *  1. we updated on of the sequence numbers, and the seqno difference
+        *     is at least x->replay_maxdiff, in this case we also update the
+        *     timeout of our timer function
+        *  2. if x->replay_maxage has elapsed since last update,
+        *     and there were changes
+        *
+        *  The state structure must be locked!
+        */
+
+       switch (event) {
+       case XFRM_REPLAY_UPDATE:
+               if (x->replay_maxdiff &&
+                   (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
+                   (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
+                       if (x->xflags & XFRM_TIME_DEFER)
+                               event = XFRM_REPLAY_TIMEOUT;
+                       else
+                               return;
+               }
 
+               break;
+
+       case XFRM_REPLAY_TIMEOUT:
+               if ((x->replay.seq == x->preplay.seq) &&
+                   (x->replay.bitmap == x->preplay.bitmap) &&
+                   (x->replay.oseq == x->preplay.oseq)) {
+                       x->xflags |= XFRM_TIME_DEFER;
+                       return;
+               }
+
+               break;
+       }
+
+       memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
+       c.event = XFRM_MSG_NEWAE;
+       c.data.aevent = event;
+       km_state_notify(x, &c);
+
+       if (x->replay_maxage &&
+           !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
+               xfrm_state_hold(x);
+               x->xflags &= ~XFRM_TIME_DEFER;
+       }
+}
+EXPORT_SYMBOL(xfrm_replay_notify);
+
+static void xfrm_replay_timer_handler(unsigned long data)
+{
+       struct xfrm_state *x = (struct xfrm_state*)data;
+
+       spin_lock(&x->lock);
+
+       if (x->km.state == XFRM_STATE_VALID) {
+               if (xfrm_aevent_is_on())
+                       xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
+               else
+                       x->xflags |= XFRM_TIME_DEFER;
+       }
+
+       spin_unlock(&x->lock);
+       xfrm_state_put(x);
+}
 
 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
 {
@@ -702,6 +879,7 @@ int xfrm_replay_check(struct xfrm_state *x, u32 seq)
        }
        return 0;
 }
+EXPORT_SYMBOL(xfrm_replay_check);
 
 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
 {
@@ -720,56 +898,72 @@ void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
                diff = x->replay.seq - seq;
                x->replay.bitmap |= (1U << diff);
        }
-}
 
-int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
-{
-       int i;
-
-       for (i=0; i<n; i++) {
-               int match;
-               match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
-               if (!match)
-                       return -EINVAL;
-       }
-       return 0;
+       if (xfrm_aevent_is_on())
+               xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 }
+EXPORT_SYMBOL(xfrm_replay_advance);
 
 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
-static rwlock_t                xfrm_km_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(xfrm_km_lock);
 
-void km_state_expired(struct xfrm_state *x, int hard)
+void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
 {
        struct xfrm_mgr *km;
 
-       if (hard)
-               x->km.state = XFRM_STATE_EXPIRED;
-       else
-               x->km.dying = 1;
+       read_lock(&xfrm_km_lock);
+       list_for_each_entry(km, &xfrm_km_list, list)
+               if (km->notify_policy)
+                       km->notify_policy(xp, dir, c);
+       read_unlock(&xfrm_km_lock);
+}
 
+void km_state_notify(struct xfrm_state *x, struct km_event *c)
+{
+       struct xfrm_mgr *km;
        read_lock(&xfrm_km_lock);
        list_for_each_entry(km, &xfrm_km_list, list)
-               km->notify(x, hard);
+               if (km->notify)
+                       km->notify(x, c);
        read_unlock(&xfrm_km_lock);
+}
+
+EXPORT_SYMBOL(km_policy_notify);
+EXPORT_SYMBOL(km_state_notify);
+
+void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
+{
+       struct km_event c;
+
+       c.data.hard = hard;
+       c.pid = pid;
+       c.event = XFRM_MSG_EXPIRE;
+       km_state_notify(x, &c);
 
        if (hard)
                wake_up(&km_waitq);
 }
 
+EXPORT_SYMBOL(km_state_expired);
+/*
+ * We send to all registered managers regardless of failure
+ * We are happy with one success
+*/
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
 {
-       int err = -EINVAL;
+       int err = -EINVAL, acqret;
        struct xfrm_mgr *km;
 
        read_lock(&xfrm_km_lock);
        list_for_each_entry(km, &xfrm_km_list, list) {
-               err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
-               if (!err)
-                       break;
+               acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+               if (!acqret)
+                       err = acqret;
        }
        read_unlock(&xfrm_km_lock);
        return err;
 }
+EXPORT_SYMBOL(km_query);
 
 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
 {
@@ -786,22 +980,23 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
        read_unlock(&xfrm_km_lock);
        return err;
 }
+EXPORT_SYMBOL(km_new_mapping);
 
-void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
+void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
 {
-       struct xfrm_mgr *km;
+       struct km_event c;
 
-       read_lock(&xfrm_km_lock);
-       list_for_each_entry(km, &xfrm_km_list, list)
-               if (km->notify_policy)
-                       km->notify_policy(pol, dir, hard);
-       read_unlock(&xfrm_km_lock);
+       c.data.hard = hard;
+       c.pid = pid;
+       c.event = XFRM_MSG_POLEXPIRE;
+       km_policy_notify(pol, dir, &c);
 
        if (hard)
                wake_up(&km_waitq);
 }
+EXPORT_SYMBOL(km_policy_expired);
 
-int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
+int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
        int err;
        u8 *data;
@@ -839,6 +1034,7 @@ out:
        kfree(data);
        return err;
 }
+EXPORT_SYMBOL(xfrm_user_policy);
 
 int xfrm_register_km(struct xfrm_mgr *km)
 {
@@ -847,6 +1043,7 @@ int xfrm_register_km(struct xfrm_mgr *km)
        write_unlock_bh(&xfrm_km_lock);
        return 0;
 }
+EXPORT_SYMBOL(xfrm_register_km);
 
 int xfrm_unregister_km(struct xfrm_mgr *km)
 {
@@ -855,6 +1052,7 @@ int xfrm_unregister_km(struct xfrm_mgr *km)
        write_unlock_bh(&xfrm_km_lock);
        return 0;
 }
+EXPORT_SYMBOL(xfrm_unregister_km);
 
 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 {
@@ -863,7 +1061,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
                return -EINVAL;
        if (unlikely(afinfo->family >= NPROTO))
                return -EAFNOSUPPORT;
-       write_lock(&xfrm_state_afinfo_lock);
+       write_lock_bh(&xfrm_state_afinfo_lock);
        if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
                err = -ENOBUFS;
        else {
@@ -871,9 +1069,10 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
                afinfo->state_byspi = xfrm_state_byspi;
                xfrm_state_afinfo[afinfo->family] = afinfo;
        }
-       write_unlock(&xfrm_state_afinfo_lock);
+       write_unlock_bh(&xfrm_state_afinfo_lock);
        return err;
 }
+EXPORT_SYMBOL(xfrm_state_register_afinfo);
 
 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 {
@@ -882,7 +1081,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
                return -EINVAL;
        if (unlikely(afinfo->family >= NPROTO))
                return -EAFNOSUPPORT;
-       write_lock(&xfrm_state_afinfo_lock);
+       write_lock_bh(&xfrm_state_afinfo_lock);
        if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
                if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
                        err = -EINVAL;
@@ -892,11 +1091,12 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
                        afinfo->state_bydst = NULL;
                }
        }
-       write_unlock(&xfrm_state_afinfo_lock);
+       write_unlock_bh(&xfrm_state_afinfo_lock);
        return err;
 }
+EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
 
-struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
 {
        struct xfrm_state_afinfo *afinfo;
        if (unlikely(family >= NPROTO))
@@ -909,7 +1109,7 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
        return afinfo;
 }
 
-void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
 {
        if (unlikely(afinfo == NULL))
                return;
@@ -929,7 +1129,81 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
                x->tunnel = NULL;
        }
 }
+EXPORT_SYMBOL(xfrm_state_delete_tunnel);
+
+/*
+ * This function is NOT optimal.  For example, with ESP it will give an
+ * MTU that's usually two bytes short of being optimal.  However, it will
+ * usually give an answer that's a multiple of 4 provided the input is
+ * also a multiple of 4.
+ */
+int xfrm_state_mtu(struct xfrm_state *x, int mtu)
+{
+       int res = mtu;
+
+       res -= x->props.header_len;
+
+       for (;;) {
+               int m = res;
+
+               if (m < 68)
+                       return 68;
+
+               spin_lock_bh(&x->lock);
+               if (x->km.state == XFRM_STATE_VALID &&
+                   x->type && x->type->get_max_size)
+                       m = x->type->get_max_size(x, m);
+               else
+                       m += x->props.header_len;
+               spin_unlock_bh(&x->lock);
+
+               if (m <= mtu)
+                       break;
+               res -= (m - mtu);
+       }
+
+       return res;
+}
+
+EXPORT_SYMBOL(xfrm_state_mtu);
+
+int xfrm_init_state(struct xfrm_state *x)
+{
+       struct xfrm_state_afinfo *afinfo;
+       int family = x->props.family;
+       int err;
+
+       err = -EAFNOSUPPORT;
+       afinfo = xfrm_state_get_afinfo(family);
+       if (!afinfo)
+               goto error;
+
+       err = 0;
+       if (afinfo->init_flags)
+               err = afinfo->init_flags(x);
+
+       xfrm_state_put_afinfo(afinfo);
+
+       if (err)
+               goto error;
+
+       err = -EPROTONOSUPPORT;
+       x->type = xfrm_get_type(x->id.proto, family);
+       if (x->type == NULL)
+               goto error;
+
+       err = x->type->init_state(x);
+       if (err)
+               goto error;
+
+       x->km.state = XFRM_STATE_VALID;
+
+error:
+       return err;
+}
 
+EXPORT_SYMBOL(xfrm_init_state);
 void __init xfrm_state_init(void)
 {
        int i;