Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / net / xfrm / xfrm_policy.c
index 450b0a6..b469c8b 100644 (file)
@@ -10,7 +10,7 @@
  *     YOSHIFUJI Hideaki
  *             Split up af-specific portion
  *     Derek Atkins <derek@ihtfp.com>          Add the post_input processor
- *     
+ *
  */
 
 #include <linux/config.h>
 #include <linux/workqueue.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
+#include <linux/netfilter.h>
 #include <linux/module.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
-DECLARE_MUTEX(xfrm_cfg_sem);
-EXPORT_SYMBOL(xfrm_cfg_sem);
+DEFINE_MUTEX(xfrm_cfg_mutex);
+EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
@@ -36,7 +37,7 @@ EXPORT_SYMBOL(xfrm_policy_list);
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 
-static kmem_cache_t *xfrm_dst_cache;
+static kmem_cache_t *xfrm_dst_cache __read_mostly;
 
 static struct work_struct xfrm_policy_gc_work;
 static struct list_head xfrm_policy_gc_list =
@@ -56,12 +57,12 @@ int xfrm_register_type(struct xfrm_type *type, unsigned short family)
                return -EAFNOSUPPORT;
        typemap = afinfo->type_map;
 
-       write_lock(&typemap->lock);
+       write_lock_bh(&typemap->lock);
        if (likely(typemap->map[type->proto] == NULL))
                typemap->map[type->proto] = type;
        else
                err = -EEXIST;
-       write_unlock(&typemap->lock);
+       write_unlock_bh(&typemap->lock);
        xfrm_policy_put_afinfo(afinfo);
        return err;
 }
@@ -77,12 +78,12 @@ int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
                return -EAFNOSUPPORT;
        typemap = afinfo->type_map;
 
-       write_lock(&typemap->lock);
+       write_lock_bh(&typemap->lock);
        if (unlikely(typemap->map[type->proto] != type))
                err = -ENOENT;
        else
                typemap->map[type->proto] = NULL;
-       write_unlock(&typemap->lock);
+       write_unlock_bh(&typemap->lock);
        xfrm_policy_put_afinfo(afinfo);
        return err;
 }
@@ -117,7 +118,6 @@ retry:
        xfrm_policy_put_afinfo(afinfo);
        return type;
 }
-EXPORT_SYMBOL(xfrm_get_type);
 
 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
                    unsigned short family)
@@ -163,7 +163,7 @@ static void xfrm_policy_timer(unsigned long data)
        if (xp->dead)
                goto out;
 
-       dir = xp->index & 7;
+       dir = xfrm_policy_id2dir(xp->index);
 
        if (xp->lft.hard_add_expires_seconds) {
                long tmo = xp->lft.hard_add_expires_seconds +
@@ -203,7 +203,7 @@ static void xfrm_policy_timer(unsigned long data)
        }
 
        if (warn)
-               km_policy_expired(xp, dir, 0);
+               km_policy_expired(xp, dir, 0, 0);
        if (next != LONG_MAX &&
            !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
                xfrm_pol_hold(xp);
@@ -215,8 +215,8 @@ out:
 
 expired:
        read_unlock(&xp->lock);
-       km_policy_expired(xp, dir, 1);
-       xfrm_policy_delete(xp, dir);
+       if (!xfrm_policy_delete(xp, dir))
+               km_policy_expired(xp, dir, 1, 0);
        xfrm_pol_put(xp);
 }
 
@@ -225,7 +225,7 @@ expired:
  * SPD calls.
  */
 
-struct xfrm_policy *xfrm_policy_alloc(int gfp)
+struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 {
        struct xfrm_policy *policy;
 
@@ -247,15 +247,14 @@ EXPORT_SYMBOL(xfrm_policy_alloc);
 
 void __xfrm_policy_destroy(struct xfrm_policy *policy)
 {
-       if (!policy->dead)
-               BUG();
+       BUG_ON(!policy->dead);
 
-       if (policy->bundles)
-               BUG();
+       BUG_ON(policy->bundles);
 
        if (del_timer(&policy->timer))
                BUG();
 
+       security_xfrm_policy_free(policy);
        kfree(policy);
 }
 EXPORT_SYMBOL(__xfrm_policy_destroy);
@@ -300,19 +299,23 @@ static void xfrm_policy_gc_task(void *data)
 
 static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
-       write_lock_bh(&policy->lock);
-       if (policy->dead)
-               goto out;
+       int dead;
 
+       write_lock_bh(&policy->lock);
+       dead = policy->dead;
        policy->dead = 1;
+       write_unlock_bh(&policy->lock);
+
+       if (unlikely(dead)) {
+               WARN_ON(1);
+               return;
+       }
 
        spin_lock(&xfrm_policy_gc_lock);
        list_add(&policy->list, &xfrm_policy_gc_list);
        spin_unlock(&xfrm_policy_gc_lock);
-       schedule_work(&xfrm_policy_gc_work);
 
-out:
-       write_unlock_bh(&policy->lock);
+       schedule_work(&xfrm_policy_gc_work);
 }
 
 /* Generate new index... KAME seems to generate them ordered by cost
@@ -342,10 +345,12 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        struct xfrm_policy *pol, **p;
        struct xfrm_policy *delpol = NULL;
        struct xfrm_policy **newpos = NULL;
+       struct dst_entry *gc_list;
 
        write_lock_bh(&xfrm_policy_lock);
        for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
-               if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+               if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+                   xfrm_sec_ctx_match(pol->security, policy->security)) {
                        if (excl) {
                                write_unlock_bh(&xfrm_policy_lock);
                                return -EEXIST;
@@ -377,21 +382,49 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
                xfrm_pol_hold(policy);
        write_unlock_bh(&xfrm_policy_lock);
 
-       if (delpol) {
+       if (delpol)
                xfrm_policy_kill(delpol);
+
+       read_lock_bh(&xfrm_policy_lock);
+       gc_list = NULL;
+       for (policy = policy->next; policy; policy = policy->next) {
+               struct dst_entry *dst;
+
+               write_lock(&policy->lock);
+               dst = policy->bundles;
+               if (dst) {
+                       struct dst_entry *tail = dst;
+                       while (tail->next)
+                               tail = tail->next;
+                       tail->next = gc_list;
+                       gc_list = dst;
+
+                       policy->bundles = NULL;
+               }
+               write_unlock(&policy->lock);
+       }
+       read_unlock_bh(&xfrm_policy_lock);
+
+       while (gc_list) {
+               struct dst_entry *dst = gc_list;
+
+               gc_list = dst->next;
+               dst_free(dst);
        }
+
        return 0;
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
-                                     int delete)
+struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+                                         struct xfrm_sec_ctx *ctx, int delete)
 {
        struct xfrm_policy *pol, **p;
 
        write_lock_bh(&xfrm_policy_lock);
        for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-               if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+               if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
+                   (xfrm_sec_ctx_match(ctx, pol->security))) {
                        xfrm_pol_hold(pol);
                        if (delete)
                                *p = pol->next;
@@ -406,14 +439,14 @@ struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
        }
        return pol;
 }
-EXPORT_SYMBOL(xfrm_policy_bysel);
+EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 {
        struct xfrm_policy *pol, **p;
 
        write_lock_bh(&xfrm_policy_lock);
-       for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
+       for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
                if (pol->index == id) {
                        xfrm_pol_hold(pol);
                        if (delete)
@@ -487,7 +520,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
                               void **objp, atomic_t **obj_refp)
 {
        struct xfrm_policy *pol;
@@ -501,9 +534,12 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
                        continue;
 
                match = xfrm_selector_match(sel, fl, family);
+
                if (match) {
-                       xfrm_pol_hold(pol);
-                       break;
+                       if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
+                               xfrm_pol_hold(pol);
+                               break;
+                       }
                }
        }
        read_unlock_bh(&xfrm_policy_lock);
@@ -511,15 +547,37 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
                *obj_refp = &pol->refcnt;
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+static inline int policy_to_flow_dir(int dir)
+{
+       if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+           XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+           XFRM_POLICY_FWD == FLOW_DIR_FWD)
+               return dir;
+       switch (dir) {
+       default:
+       case XFRM_POLICY_IN:
+               return FLOW_DIR_IN;
+       case XFRM_POLICY_OUT:
+               return FLOW_DIR_OUT;
+       case XFRM_POLICY_FWD:
+               return FLOW_DIR_FWD;
+       };
+}
+
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
 {
        struct xfrm_policy *pol;
 
        read_lock_bh(&xfrm_policy_lock);
        if ((pol = sk->sk_policy[dir]) != NULL) {
-               int match = xfrm_selector_match(&pol->selector, fl,
+               int match = xfrm_selector_match(&pol->selector, fl,
                                                sk->sk_family);
+               int err = 0;
+
                if (match)
+                 err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+
+               if (match && !err)
                        xfrm_pol_hold(pol);
                else
                        pol = NULL;
@@ -550,7 +608,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
        return NULL;
 }
 
-void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
 {
        write_lock_bh(&xfrm_policy_lock);
        pol = __xfrm_policy_unlink(pol, dir);
@@ -559,8 +617,11 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
                if (dir < XFRM_POLICY_MAX)
                        atomic_inc(&flow_cache_genid);
                xfrm_policy_kill(pol);
+               return 0;
        }
+       return -ENOENT;
 }
+EXPORT_SYMBOL(xfrm_policy_delete);
 
 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 {
@@ -590,6 +651,10 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 
        if (newp) {
                newp->selector = old->selector;
+               if (security_xfrm_policy_clone(old, newp)) {
+                       kfree(newp);
+                       return NULL;  /* ENOMEM */
+               }
                newp->lft = old->lft;
                newp->curlft = old->curlft;
                newp->action = old->action;
@@ -701,22 +766,6 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
        return err;
 }
 
-static inline int policy_to_flow_dir(int dir)
-{
-       if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-           XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-           XFRM_POLICY_FWD == FLOW_DIR_FWD)
-               return dir;
-       switch (dir) {
-       default:
-       case XFRM_POLICY_IN:
-               return FLOW_DIR_IN;
-       case XFRM_POLICY_OUT:
-               return FLOW_DIR_OUT;
-       case XFRM_POLICY_FWD:
-               return FLOW_DIR_FWD;
-       };
-}
 
 static int stale_bundle(struct dst_entry *dst);
 
@@ -734,33 +783,35 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
        int nx = 0;
        int err;
        u32 genid;
-       u16 family = dst_orig->ops->family;
+       u16 family;
+       u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+       u32 sk_sid = security_sk_sid(sk, fl, dir);
 restart:
        genid = atomic_read(&flow_cache_genid);
        policy = NULL;
        if (sk && sk->sk_policy[1])
-               policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+               policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
 
        if (!policy) {
                /* To accelerate a bit...  */
                if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
                        return 0;
 
-               policy = flow_cache_lookup(fl, family,
-                                          policy_to_flow_dir(XFRM_POLICY_OUT),
-                                          xfrm_policy_lookup);
+               policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
+                                          dir, xfrm_policy_lookup);
        }
 
        if (!policy)
                return 0;
 
+       family = dst_orig->ops->family;
        policy->curlft.use_time = (unsigned long)xtime.tv_sec;
 
        switch (policy->action) {
        case XFRM_POLICY_BLOCK:
                /* Prohibit the flow */
-               xfrm_pol_put(policy);
-               return -EPERM;
+               err = -EPERM;
+               goto error;
 
        case XFRM_POLICY_ALLOW:
                if (policy->xfrm_nr == 0) {
@@ -776,8 +827,8 @@ restart:
                 */
                dst = xfrm_find_bundle(fl, policy, family);
                if (IS_ERR(dst)) {
-                       xfrm_pol_put(policy);
-                       return PTR_ERR(dst);
+                       err = PTR_ERR(dst);
+                       goto error;
                }
 
                if (dst)
@@ -836,11 +887,11 @@ restart:
                         * We can't enlist stable bundles either.
                         */
                        write_unlock_bh(&policy->lock);
-
-                       xfrm_pol_put(policy);
                        if (dst)
                                dst_free(dst);
-                       goto restart;
+
+                       err = -EHOSTUNREACH;
+                       goto error;
                }
                dst->next = policy->bundles;
                policy->bundles = dst;
@@ -892,16 +943,16 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
        } else
                start = -1;
        for (; idx < sp->len; idx++) {
-               if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
+               if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
                        return ++idx;
-               if (sp->x[idx].xvec->props.mode)
+               if (sp->xvec[idx]->props.mode)
                        break;
        }
        return start;
 }
 
-static int
-_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+int
+xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 {
        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
@@ -912,11 +963,12 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
        xfrm_policy_put_afinfo(afinfo);
        return 0;
 }
+EXPORT_SYMBOL(xfrm_decode_session);
 
 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
 {
        for (; k < sp->len; k++) {
-               if (sp->x[k].xvec->props.mode)
+               if (sp->xvec[k]->props.mode)
                        return 1;
        }
 
@@ -928,35 +980,32 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 {
        struct xfrm_policy *pol;
        struct flowi fl;
+       u8 fl_dir = policy_to_flow_dir(dir);
+       u32 sk_sid;
 
-       if (_decode_session(skb, &fl, family) < 0)
+       if (xfrm_decode_session(skb, &fl, family) < 0)
                return 0;
+       nf_nat_decode_session(skb, &fl, family);
+
+       sk_sid = security_sk_sid(sk, &fl, fl_dir);
 
        /* First, check used SA against their selectors. */
        if (skb->sp) {
                int i;
 
                for (i=skb->sp->len-1; i>=0; i--) {
-                 struct sec_decap_state *xvec = &(skb->sp->x[i]);
-                       if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
-                               return 0;
-
-                       /* If there is a post_input processor, try running it */
-                       if (xvec->xvec->type->post_input &&
-                           (xvec->xvec->type->post_input)(xvec->xvec,
-                                                          &(xvec->decap),
-                                                          skb) != 0)
+                       struct xfrm_state *x = skb->sp->xvec[i];
+                       if (!xfrm_selector_match(&x->sel, &fl, family))
                                return 0;
                }
        }
 
        pol = NULL;
        if (sk && sk->sk_policy[dir])
-               pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+               pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
 
        if (!pol)
-               pol = flow_cache_lookup(&fl, family,
-                                       policy_to_flow_dir(dir),
+               pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
                                        xfrm_policy_lookup);
 
        if (!pol)
@@ -1001,60 +1050,36 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
        struct flowi fl;
 
-       if (_decode_session(skb, &fl, family) < 0)
+       if (xfrm_decode_session(skb, &fl, family) < 0)
                return 0;
 
        return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
-/* Optimize later using cookies and generation ids. */
-
 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 {
-       if (!stale_bundle(dst))
-               return dst;
-
-       dst_release(dst);
+       /* If it is marked obsolete, which is how we even get here,
+        * then we have purged it from the policy bundle list and we
+        * did that for a good reason.
+        */
        return NULL;
 }
 
 static int stale_bundle(struct dst_entry *dst)
 {
-       struct dst_entry *child = dst;
-
-       while (child) {
-               if (child->obsolete > 0 ||
-                   (child->dev && !netif_running(child->dev)) ||
-                   (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
-                       return 1;
-               }
-               child = child->child;
-       }
-
-       return 0;
-}
-
-static void xfrm_dst_destroy(struct dst_entry *dst)
-{
-       if (!dst->xfrm)
-               return;
-       xfrm_state_put(dst->xfrm);
-       dst->xfrm = NULL;
+       return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
 }
 
-static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-                           int unregister)
+void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 {
-       if (!unregister)
-               return;
-
        while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
                dst->dev = &loopback_dev;
                dev_hold(&loopback_dev);
                dev_put(dev);
        }
 }
+EXPORT_SYMBOL(xfrm_dst_ifdown);
 
 static void xfrm_link_failure(struct sk_buff *skb)
 {
@@ -1121,46 +1146,104 @@ int xfrm_flush_bundles(void)
        return 0;
 }
 
-/* Well... that's _TASK_. We need to scan through transformation
- * list and figure out what mss tcp should generate in order to
- * final datagram fit to mtu. Mama mia... :-)
- *
- * Apparently, some easy way exists, but we used to choose the most
- * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
- *
- * Consider this function as something like dark humour. :-)
+static int always_true(struct dst_entry *dst)
+{
+       return 1;
+}
+
+void xfrm_flush_all_bundles(void)
+{
+       xfrm_prune_bundles(always_true);
+}
+
+void xfrm_init_pmtu(struct dst_entry *dst)
+{
+       do {
+               struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+               u32 pmtu, route_mtu_cached;
+
+               pmtu = dst_mtu(dst->child);
+               xdst->child_mtu_cached = pmtu;
+
+               pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
+
+               route_mtu_cached = dst_mtu(xdst->route);
+               xdst->route_mtu_cached = route_mtu_cached;
+
+               if (pmtu > route_mtu_cached)
+                       pmtu = route_mtu_cached;
+
+               dst->metrics[RTAX_MTU-1] = pmtu;
+       } while ((dst = dst->next));
+}
+
+EXPORT_SYMBOL(xfrm_init_pmtu);
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
  */
-static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
+
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
 {
-       int res = mtu - dst->header_len;
+       struct dst_entry *dst = &first->u.dst;
+       struct xfrm_dst *last;
+       u32 mtu;
+
+       if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
+           (dst->dev && !netif_running(dst->dev)))
+               return 0;
+
+       last = NULL;
 
+       do {
+               struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+               if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
+                       return 0;
+               if (dst->xfrm->km.state != XFRM_STATE_VALID)
+                       return 0;
+
+               mtu = dst_mtu(dst->child);
+               if (xdst->child_mtu_cached != mtu) {
+                       last = xdst;
+                       xdst->child_mtu_cached = mtu;
+               }
+
+               if (!dst_check(xdst->route, xdst->route_cookie))
+                       return 0;
+               mtu = dst_mtu(xdst->route);
+               if (xdst->route_mtu_cached != mtu) {
+                       last = xdst;
+                       xdst->route_mtu_cached = mtu;
+               }
+
+               dst = dst->child;
+       } while (dst->xfrm);
+
+       if (likely(!last))
+               return 1;
+
+       mtu = last->child_mtu_cached;
        for (;;) {
-               struct dst_entry *d = dst;
-               int m = res;
-
-               do {
-                       struct xfrm_state *x = d->xfrm;
-                       if (x) {
-                               spin_lock_bh(&x->lock);
-                               if (x->km.state == XFRM_STATE_VALID &&
-                                   x->type && x->type->get_max_size)
-                                       m = x->type->get_max_size(d->xfrm, m);
-                               else
-                                       m += x->props.header_len;
-                               spin_unlock_bh(&x->lock);
-                       }
-               } while ((d = d->child) != NULL);
+               dst = &last->u.dst;
 
-               if (m <= mtu)
+               mtu = xfrm_state_mtu(dst->xfrm, mtu);
+               if (mtu > last->route_mtu_cached)
+                       mtu = last->route_mtu_cached;
+               dst->metrics[RTAX_MTU-1] = mtu;
+
+               if (last == first)
                        break;
-               res -= (m - mtu);
-               if (res < 88)
-                       return mtu;
+
+               last = last->u.next;
+               last->child_mtu_cached = mtu;
        }
 
-       return res + dst->header_len;
+       return 1;
 }
 
+EXPORT_SYMBOL(xfrm_bundle_ok);
+
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
        int err = 0;
@@ -1168,7 +1251,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
                return -EINVAL;
        if (unlikely(afinfo->family >= NPROTO))
                return -EAFNOSUPPORT;
-       write_lock(&xfrm_policy_afinfo_lock);
+       write_lock_bh(&xfrm_policy_afinfo_lock);
        if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
                err = -ENOBUFS;
        else {
@@ -1177,21 +1260,15 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
                        dst_ops->kmem_cachep = xfrm_dst_cache;
                if (likely(dst_ops->check == NULL))
                        dst_ops->check = xfrm_dst_check;
-               if (likely(dst_ops->destroy == NULL))
-                       dst_ops->destroy = xfrm_dst_destroy;
-               if (likely(dst_ops->ifdown == NULL))
-                       dst_ops->ifdown = xfrm_dst_ifdown;
                if (likely(dst_ops->negative_advice == NULL))
                        dst_ops->negative_advice = xfrm_negative_advice;
                if (likely(dst_ops->link_failure == NULL))
                        dst_ops->link_failure = xfrm_link_failure;
-               if (likely(dst_ops->get_mss == NULL))
-                       dst_ops->get_mss = xfrm_get_mss;
                if (likely(afinfo->garbage_collect == NULL))
                        afinfo->garbage_collect = __xfrm_garbage_collect;
                xfrm_policy_afinfo[afinfo->family] = afinfo;
        }
-       write_unlock(&xfrm_policy_afinfo_lock);
+       write_unlock_bh(&xfrm_policy_afinfo_lock);
        return err;
 }
 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -1203,7 +1280,7 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
                return -EINVAL;
        if (unlikely(afinfo->family >= NPROTO))
                return -EAFNOSUPPORT;
-       write_lock(&xfrm_policy_afinfo_lock);
+       write_lock_bh(&xfrm_policy_afinfo_lock);
        if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
                if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
                        err = -EINVAL;
@@ -1212,15 +1289,12 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
                        xfrm_policy_afinfo[afinfo->family] = NULL;
                        dst_ops->kmem_cachep = NULL;
                        dst_ops->check = NULL;
-                       dst_ops->destroy = NULL;
-                       dst_ops->ifdown = NULL;
                        dst_ops->negative_advice = NULL;
                        dst_ops->link_failure = NULL;
-                       dst_ops->get_mss = NULL;
                        afinfo->garbage_collect = NULL;
                }
        }
-       write_unlock(&xfrm_policy_afinfo_lock);
+       write_unlock_bh(&xfrm_policy_afinfo_lock);
        return err;
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);