Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / net / ipv6 / route.c
index 2017c69..8a77793 100644 (file)
@@ -24,6 +24,7 @@
  *             reachable.  otherwise, round-robin the list.
  */
 
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #define RT6_TRACE(x...) do { ; } while (0)
 #endif
 
+#define CLONE_OFFLINK_ROUTE 0
+
+#define RT6_SELECT_F_IFACE     0x1
+#define RT6_SELECT_F_REACHABLE 0x2
 
 static int ip6_rt_max_size = 4096;
 static int ip6_rt_gc_min_interval = HZ / 2;
@@ -84,14 +89,23 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry        *ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void            ip6_dst_destroy(struct dst_entry *);
-static void            ip6_dst_ifdown(struct dst_entry *, int how);
+static void            ip6_dst_ifdown(struct dst_entry *,
+                                      struct net_device *dev, int how);
 static int              ip6_dst_gc(void);
 
 static int             ip6_pkt_discard(struct sk_buff *skb);
-static int             ip6_pkt_discard_out(struct sk_buff **pskb);
+static int             ip6_pkt_discard_out(struct sk_buff *skb);
 static void            ip6_link_failure(struct sk_buff *skb);
 static void            ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex,
+                                          unsigned pref);
+static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex);
+#endif
+
 static struct dst_ops ip6_dst_ops = {
        .family                 =       AF_INET6,
        .protocol               =       __constant_htons(ETH_P_IPV6),
@@ -133,7 +147,7 @@ struct fib6_node ip6_routing_table = {
 
 /* Protects all the ip6 fib */
 
-rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
+DEFINE_RWLOCK(rt6_lock);
 
 
 /* allocate dst with ip6_dst_ops */
@@ -153,9 +167,25 @@ static void ip6_dst_destroy(struct dst_entry *dst)
        }       
 }
 
-static void ip6_dst_ifdown(struct dst_entry *dst, int how)
+static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+                          int how)
+{
+       struct rt6_info *rt = (struct rt6_info *)dst;
+       struct inet6_dev *idev = rt->rt6i_idev;
+
+       if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
+               struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
+               if (loopback_idev != NULL) {
+                       rt->rt6i_idev = loopback_idev;
+                       in6_dev_put(idev);
+               }
+       }
+}
+
+static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 {
-       ip6_dst_destroy(dst);
+       return (rt->rt6i_flags & RTF_EXPIRES &&
+               time_after(jiffies, rt->rt6i_expires));
 }
 
 /*
@@ -174,8 +204,17 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
                        struct net_device *dev = sprt->rt6i_dev;
                        if (dev->ifindex == oif)
                                return sprt;
-                       if (dev->flags&IFF_LOOPBACK)
+                       if (dev->flags & IFF_LOOPBACK) {
+                               if (sprt->rt6i_idev == NULL ||
+                                   sprt->rt6i_idev->dev->ifindex != oif) {
+                                       if (strict && oif)
+                                               continue;
+                                       if (local && (!oif || 
+                                                     local->rt6i_idev->dev->ifindex == oif))
+                                               continue;
+                               }
                                local = sprt;
+                       }
                }
 
                if (local)
@@ -187,134 +226,220 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
        return rt;
 }
 
+#ifdef CONFIG_IPV6_ROUTER_PREF
+static void rt6_probe(struct rt6_info *rt)
+{
+       struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
+       /*
+        * Okay, this does not seem to be appropriate
+        * for now, however, we need to check if it
+        * is really so; aka Router Reachability Probing.
+        *
+        * Router Reachability Probe MUST be rate-limited
+        * to no more than one per minute.
+        */
+       if (!neigh || (neigh->nud_state & NUD_VALID))
+               return;
+       read_lock_bh(&neigh->lock);
+       if (!(neigh->nud_state & NUD_VALID) &&
+           time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
+               struct in6_addr mcaddr;
+               struct in6_addr *target;
+
+               neigh->updated = jiffies;
+               read_unlock_bh(&neigh->lock);
+
+               target = (struct in6_addr *)&neigh->primary_key;
+               addrconf_addr_solict_mult(target, &mcaddr);
+               ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
+       } else
+               read_unlock_bh(&neigh->lock);
+}
+#else
+static inline void rt6_probe(struct rt6_info *rt)
+{
+       return;
+}
+#endif
+
 /*
- *     pointer to the last default router chosen. BH is disabled locally.
+ * Default Router Selection (RFC 2461 6.3.6)
  */
-static struct rt6_info *rt6_dflt_pointer;
-static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
+static int inline rt6_check_dev(struct rt6_info *rt, int oif)
+{
+       struct net_device *dev = rt->rt6i_dev;
+       if (!oif || dev->ifindex == oif)
+               return 2;
+       if ((dev->flags & IFF_LOOPBACK) &&
+           rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
+               return 1;
+       return 0;
+}
 
-/* Default Router Selection (RFC 2461 6.3.6) */
-static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
+static int inline rt6_check_neigh(struct rt6_info *rt)
 {
-       struct rt6_info *match = NULL;
-       struct rt6_info *sprt;
-       int mpri = 0;
+       struct neighbour *neigh = rt->rt6i_nexthop;
+       int m = 0;
+       if (rt->rt6i_flags & RTF_NONEXTHOP ||
+           !(rt->rt6i_flags & RTF_GATEWAY))
+               m = 1;
+       else if (neigh) {
+               read_lock_bh(&neigh->lock);
+               if (neigh->nud_state & NUD_VALID)
+                       m = 2;
+               read_unlock_bh(&neigh->lock);
+       }
+       return m;
+}
 
-       for (sprt = rt; sprt; sprt = sprt->u.next) {
-               struct neighbour *neigh;
-               int m = 0;
+static int rt6_score_route(struct rt6_info *rt, int oif,
+                          int strict)
+{
+       int m, n;
+               
+       m = rt6_check_dev(rt, oif);
+       if (!m && (strict & RT6_SELECT_F_IFACE))
+               return -1;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+       m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
+#endif
+       n = rt6_check_neigh(rt);
+       if (n > 1)
+               m |= 16;
+       else if (!n && strict & RT6_SELECT_F_REACHABLE)
+               return -1;
+       return m;
+}
 
-               if (!oif ||
-                   (sprt->rt6i_dev &&
-                    sprt->rt6i_dev->ifindex == oif))
-                       m += 8;
+static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
+                                  int strict)
+{
+       struct rt6_info *match = NULL, *last = NULL;
+       struct rt6_info *rt, *rt0 = *head;
+       u32 metric;
+       int mpri = -1;
 
-               if (sprt == rt6_dflt_pointer)
-                       m += 4;
+       RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
+                 __FUNCTION__, head, head ? *head : NULL, oif);
 
-               if ((neigh = sprt->rt6i_nexthop) != NULL) {
-                       read_lock_bh(&neigh->lock);
-                       switch (neigh->nud_state) {
-                       case NUD_REACHABLE:
-                               m += 3;
-                               break;
+       for (rt = rt0, metric = rt0->rt6i_metric;
+            rt && rt->rt6i_metric == metric && (!last || rt != rt0);
+            rt = rt->u.next) {
+               int m;
 
-                       case NUD_STALE:
-                       case NUD_DELAY:
-                       case NUD_PROBE:
-                               m += 2;
-                               break;
+               if (rt6_check_expired(rt))
+                       continue;
 
-                       case NUD_NOARP:
-                       case NUD_PERMANENT:
-                               m += 1;
-                               break;
+               last = rt;
 
-                       case NUD_INCOMPLETE:
-                       default:
-                               read_unlock_bh(&neigh->lock);
-                               continue;
-                       }
-                       read_unlock_bh(&neigh->lock);
-               } else {
+               m = rt6_score_route(rt, oif, strict);
+               if (m < 0)
                        continue;
-               }
 
-               if (m > mpri || m >= 12) {
-                       match = sprt;
+               if (m > mpri) {
+                       rt6_probe(match);
+                       match = rt;
                        mpri = m;
-                       if (m >= 12) {
-                               /* we choose the last default router if it
-                                * is in (probably) reachable state.
-                                * If route changed, we should do pmtu
-                                * discovery. --yoshfuji
-                                */
-                               break;
-                       }
+               } else {
+                       rt6_probe(rt);
                }
        }
 
-       spin_lock(&rt6_dflt_lock);
-       if (!match) {
-               /*
-                *      No default routers are known to be reachable.
-                *      SHOULD round robin
-                */
-               if (rt6_dflt_pointer) {
-                       for (sprt = rt6_dflt_pointer->u.next;
-                            sprt; sprt = sprt->u.next) {
-                               if (sprt->u.dst.obsolete <= 0 &&
-                                   sprt->u.dst.error == 0) {
-                                       match = sprt;
-                                       break;
-                               }
-                       }
-                       for (sprt = rt;
-                            !match && sprt;
-                            sprt = sprt->u.next) {
-                               if (sprt->u.dst.obsolete <= 0 &&
-                                   sprt->u.dst.error == 0) {
-                                       match = sprt;
-                                       break;
-                               }
-                               if (sprt == rt6_dflt_pointer)
-                                       break;
-                       }
-               }
+       if (!match &&
+           (strict & RT6_SELECT_F_REACHABLE) &&
+           last && last != rt0) {
+               /* no entries matched; do round-robin */
+               static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+               spin_lock(&lock);
+               *head = rt0->u.next;
+               rt0->u.next = last->u.next;
+               last->u.next = rt0;
+               spin_unlock(&lock);
        }
 
-       if (match) {
-               if (rt6_dflt_pointer != match)
-                       RT6_TRACE("changed default router: %p->%p\n",
-                                 rt6_dflt_pointer, match);
-               rt6_dflt_pointer = match;
+       RT6_TRACE("%s() => %p, score=%d\n",
+                 __FUNCTION__, match, mpri);
+
+       return (match ? match : &ip6_null_entry);
+}
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
+                 struct in6_addr *gwaddr)
+{
+       struct route_info *rinfo = (struct route_info *) opt;
+       struct in6_addr prefix_buf, *prefix;
+       unsigned int pref;
+       u32 lifetime;
+       struct rt6_info *rt;
+
+       if (len < sizeof(struct route_info)) {
+               return -EINVAL;
        }
-       spin_unlock(&rt6_dflt_lock);
 
-       if (!match) {
-               /*
-                * Last Resort: if no default routers found, 
-                * use addrconf default route.
-                * We don't record this route.
-                */
-               for (sprt = ip6_routing_table.leaf;
-                    sprt; sprt = sprt->u.next) {
-                       if ((sprt->rt6i_flags & RTF_DEFAULT) &&
-                           (!oif ||
-                            (sprt->rt6i_dev &&
-                             sprt->rt6i_dev->ifindex == oif))) {
-                               match = sprt;
-                               break;
-                       }
+       /* Sanity check for prefix_len and length */
+       if (rinfo->length > 3) {
+               return -EINVAL;
+       } else if (rinfo->prefix_len > 128) {
+               return -EINVAL;
+       } else if (rinfo->prefix_len > 64) {
+               if (rinfo->length < 2) {
+                       return -EINVAL;
                }
-               if (!match) {
-                       /* no default route.  give up. */
-                       match = &ip6_null_entry;
+       } else if (rinfo->prefix_len > 0) {
+               if (rinfo->length < 1) {
+                       return -EINVAL;
                }
        }
 
-       return match;
+       pref = rinfo->route_pref;
+       if (pref == ICMPV6_ROUTER_PREF_INVALID)
+               pref = ICMPV6_ROUTER_PREF_MEDIUM;
+
+       lifetime = htonl(rinfo->lifetime);
+       if (lifetime == 0xffffffff) {
+               /* infinity */
+       } else if (lifetime > 0x7fffffff/HZ) {
+               /* Avoid arithmetic overflow */
+               lifetime = 0x7fffffff/HZ - 1;
+       }
+
+       if (rinfo->length == 3)
+               prefix = (struct in6_addr *)rinfo->prefix;
+       else {
+               /* this function is safe */
+               ipv6_addr_prefix(&prefix_buf,
+                                (struct in6_addr *)rinfo->prefix,
+                                rinfo->prefix_len);
+               prefix = &prefix_buf;
+       }
+
+       rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
+
+       if (rt && !lifetime) {
+               ip6_del_rt(rt, NULL, NULL, NULL);
+               rt = NULL;
+       }
+
+       if (!rt && lifetime)
+               rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
+                                       pref);
+       else if (rt)
+               rt->rt6i_flags = RTF_ROUTEINFO |
+                                (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+
+       if (rt) {
+               if (lifetime == 0xffffffff) {
+                       rt->rt6i_flags &= ~RTF_EXPIRES;
+               } else {
+                       rt->rt6i_expires = jiffies + HZ * lifetime;
+                       rt->rt6i_flags |= RTF_EXPIRES;
+               }
+               dst_release(&rt->u.dst);
+       }
+       return 0;
 }
+#endif
 
 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
                            int oif, int strict)
@@ -336,31 +461,27 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
        return NULL;
 }
 
-/* rt6_ins is called with FREE rt6_lock.
+/* ip6_ins_rt is called with FREE rt6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
    be destroyed.
  */
 
-static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
+               void *_rtattr, struct netlink_skb_parms *req)
 {
        int err;
 
        write_lock_bh(&rt6_lock);
-       err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
+       err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
        write_unlock_bh(&rt6_lock);
 
        return err;
 }
 
-/* No rt6_lock! If COW failed, the function returns dead route entry
-   with dst->error set to errno value.
- */
-
-static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
-                               struct in6_addr *saddr)
+static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
+                                     struct in6_addr *saddr)
 {
-       int err;
        struct rt6_info *rt;
 
        /*
@@ -370,11 +491,14 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
        rt = ip6_rt_copy(ort);
 
        if (rt) {
-               ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
-
-               if (!(rt->rt6i_flags&RTF_GATEWAY))
+               if (!(rt->rt6i_flags&RTF_GATEWAY)) {
+                       if (rt->rt6i_dst.plen != 128 &&
+                           ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
+                               rt->rt6i_flags |= RTF_ANYCAST;
                        ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+               }
 
+               ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
                rt->rt6i_dst.plen = 128;
                rt->rt6i_flags |= RTF_CACHE;
                rt->u.dst.flags |= DST_HOST;
@@ -388,25 +512,30 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
 
                rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 
-               dst_hold(&rt->u.dst);
-
-               err = rt6_ins(rt, NULL, NULL);
-               if (err == 0)
-                       return rt;
+       }
 
-               rt->u.dst.error = err;
+       return rt;
+}
 
-               return rt;
+static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
+{
+       struct rt6_info *rt = ip6_rt_copy(ort);
+       if (rt) {
+               ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
+               rt->rt6i_dst.plen = 128;
+               rt->rt6i_flags |= RTF_CACHE;
+               if (rt->rt6i_flags & RTF_REJECT)
+                       rt->u.dst.error = ort->u.dst.error;
+               rt->u.dst.flags |= DST_HOST;
+               rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
        }
-       dst_hold(&ip6_null_entry.u.dst);
-       return &ip6_null_entry;
+       return rt;
 }
 
 #define BACKTRACK() \
-if (rt == &ip6_null_entry && strict) { \
+if (rt == &ip6_null_entry) { \
        while ((fn = fn->parent) != NULL) { \
                if (fn->fn_flags & RTN_ROOT) { \
-                       dst_hold(&rt->u.dst); \
                        goto out; \
                } \
                if (fn->fn_flags & RTN_RTINFO) \
@@ -418,103 +547,138 @@ if (rt == &ip6_null_entry && strict) { \
 void ip6_route_input(struct sk_buff *skb)
 {
        struct fib6_node *fn;
-       struct rt6_info *rt;
+       struct rt6_info *rt, *nrt;
        int strict;
        int attempts = 3;
+       int err;
+       int reachable = RT6_SELECT_F_REACHABLE;
 
-       strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+       strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
 
 relookup:
        read_lock_bh(&rt6_lock);
 
+restart_2:
        fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
                         &skb->nh.ipv6h->saddr);
 
 restart:
-       rt = fn->leaf;
-
-       if ((rt->rt6i_flags & RTF_CACHE)) {
-               rt = rt6_device_match(rt, skb->dev->ifindex, strict);
-               BACKTRACK();
-               dst_hold(&rt->u.dst);
+       rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
+       BACKTRACK();
+       if (rt == &ip6_null_entry ||
+           rt->rt6i_flags & RTF_CACHE)
                goto out;
-       }
 
-       rt = rt6_device_match(rt, skb->dev->ifindex, 0);
-       BACKTRACK();
+       dst_hold(&rt->u.dst);
+       read_unlock_bh(&rt6_lock);
 
-       if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-               read_unlock_bh(&rt6_lock);
+       if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
+               nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
+       else {
+#if CLONE_OFFLINK_ROUTE
+               nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
+#else
+               goto out2;
+#endif
+       }
 
-               rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
-                            &skb->nh.ipv6h->saddr);
-                       
-               if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+       dst_release(&rt->u.dst);
+       rt = nrt ? : &ip6_null_entry;
+
+       dst_hold(&rt->u.dst);
+       if (nrt) {
+               err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
+               if (!err)
                        goto out2;
-               /* Race condition! In the gap, when rt6_lock was
-                  released someone could insert this route.  Relookup.
-               */
-               dst_release(&rt->u.dst);
-               goto relookup;
        }
-       dst_hold(&rt->u.dst);
+
+       if (--attempts <= 0)
+               goto out2;
+
+       /*
+        * Race condition! In the gap, when rt6_lock was
+        * released someone could insert this route.  Relookup.
+        */
+       dst_release(&rt->u.dst);
+       goto relookup;
 
 out:
+       if (reachable) {
+               reachable = 0;
+               goto restart_2;
+       }
+       dst_hold(&rt->u.dst);
        read_unlock_bh(&rt6_lock);
 out2:
        rt->u.dst.lastuse = jiffies;
        rt->u.dst.__use++;
        skb->dst = (struct dst_entry *) rt;
+       return;
 }
 
 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 {
        struct fib6_node *fn;
-       struct rt6_info *rt;
+       struct rt6_info *rt, *nrt;
        int strict;
        int attempts = 3;
+       int err;
+       int reachable = RT6_SELECT_F_REACHABLE;
 
-       strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+       strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
 
 relookup:
        read_lock_bh(&rt6_lock);
 
+restart_2:
        fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-       rt = fn->leaf;
-
-       if ((rt->rt6i_flags & RTF_CACHE)) {
-               rt = rt6_device_match(rt, fl->oif, strict);
-               BACKTRACK();
-               dst_hold(&rt->u.dst);
+       rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
+       BACKTRACK();
+       if (rt == &ip6_null_entry ||
+           rt->rt6i_flags & RTF_CACHE)
                goto out;
-       }
-       if (rt->rt6i_flags & RTF_DEFAULT) {
-               if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
-                       rt = rt6_best_dflt(rt, fl->oif);
-       } else {
-               rt = rt6_device_match(rt, fl->oif, strict);
-               BACKTRACK();
-       }
 
-       if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-               read_unlock_bh(&rt6_lock);
+       dst_hold(&rt->u.dst);
+       read_unlock_bh(&rt6_lock);
 
-               rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
+       if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
+               nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
+       else {
+#if CLONE_OFFLINK_ROUTE
+               nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
+#else
+               goto out2;
+#endif
+       }
 
-               if (rt->u.dst.error != -EEXIST || --attempts <= 0)
-                       goto out2;
+       dst_release(&rt->u.dst);
+       rt = nrt ? : &ip6_null_entry;
 
-               /* Race condition! In the gap, when rt6_lock was
-                  released someone could insert this route.  Relookup.
-               */
-               dst_release(&rt->u.dst);
-               goto relookup;
-       }
        dst_hold(&rt->u.dst);
+       if (nrt) {
+               err = ip6_ins_rt(nrt, NULL, NULL, NULL);
+               if (!err)
+                       goto out2;
+       }
+
+       if (--attempts <= 0)
+               goto out2;
+
+       /*
+        * Race condition! In the gap, when rt6_lock was
+        * released someone could insert this route.  Relookup.
+        */
+       dst_release(&rt->u.dst);
+       goto relookup;
 
 out:
+       if (reachable) {
+               reachable = 0;
+               goto restart_2;
+       }
+       dst_hold(&rt->u.dst);
        read_unlock_bh(&rt6_lock);
 out2:
        rt->u.dst.lastuse = jiffies;
@@ -536,7 +700,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
        if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
                return dst;
 
-       dst_release(dst);
        return NULL;
 }
 
@@ -546,7 +709,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 
        if (rt) {
                if (rt->rt6i_flags & RTF_CACHE)
-                       ip6_del_rt(rt, NULL, NULL);
+                       ip6_del_rt(rt, NULL, NULL, NULL);
                else
                        dst_release(dst);
        }
@@ -573,10 +736,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
        struct rt6_info *rt6 = (struct rt6_info*)dst;
 
-       if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
+       if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
                rt6->rt6i_flags |= RTF_MODIFIED;
-               if (mtu < IPV6_MIN_MTU)
+               if (mtu < IPV6_MIN_MTU) {
                        mtu = IPV6_MIN_MTU;
+                       dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+               }
                dst->metrics[RTAX_MTU-1] = mtu;
        }
 }
@@ -606,12 +771,19 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
                                  struct neighbour *neigh,
                                  struct in6_addr *addr,
-                                 int (*output)(struct sk_buff **))
+                                 int (*output)(struct sk_buff *))
 {
-       struct rt6_info *rt = ip6_dst_alloc();
+       struct rt6_info *rt;
+       struct inet6_dev *idev = in6_dev_get(dev);
 
-       if (unlikely(rt == NULL))
+       if (unlikely(idev == NULL))
+               return NULL;
+
+       rt = ip6_dst_alloc();
+       if (unlikely(rt == NULL)) {
+               in6_dev_put(idev);
                goto out;
+       }
 
        dev_hold(dev);
        if (neigh)
@@ -620,12 +792,12 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
                neigh = ndisc_get_neigh(dev, addr);
 
        rt->rt6i_dev      = dev;
-       rt->rt6i_idev     = in6_dev_get(dev);
+       rt->rt6i_idev     = idev;
        rt->rt6i_nexthop  = neigh;
        atomic_set(&rt->u.dst.__refcnt, 1);
        rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
        rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-       rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
+       rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
        rt->u.dst.output  = output;
 
 #if 0  /* there's no chance to use these for ndisc */
@@ -709,7 +881,7 @@ static int ipv6_get_mtu(struct net_device *dev)
        return mtu;
 }
 
-static int ipv6_get_hoplimit(struct net_device *dev)
+int ipv6_get_hoplimit(struct net_device *dev)
 {
        int hoplimit = ipv6_devconf.hop_limit;
        struct inet6_dev *idev;
@@ -726,13 +898,15 @@ static int ipv6_get_hoplimit(struct net_device *dev)
  *
  */
 
-int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
+int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
+               void *_rtattr, struct netlink_skb_parms *req)
 {
        int err;
        struct rtmsg *r;
        struct rtattr **rta;
-       struct rt6_info *rt;
+       struct rt6_info *rt = NULL;
        struct net_device *dev = NULL;
+       struct inet6_dev *idev = NULL;
        int addr_type;
 
        rta = (struct rtattr **) _rtattr;
@@ -744,9 +918,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
                return -EINVAL;
 #endif
        if (rtmsg->rtmsg_ifindex) {
+               err = -ENODEV;
                dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
                if (!dev)
-                       return -ENODEV;
+                       goto out;
+               idev = in6_dev_get(dev);
+               if (!idev)
+                       goto out;
        }
 
        if (rtmsg->rtmsg_metric == 0)
@@ -754,11 +932,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
 
        rt = ip6_dst_alloc();
 
-       if (rt == NULL)
-               return -ENOMEM;
+       if (rt == NULL) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        rt->u.dst.obsolete = -1;
-       rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
+       rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
        if (nlh && (r = NLMSG_DATA(nlh))) {
                rt->rt6i_protocol = r->rtm_protocol;
        } else {
@@ -793,10 +973,20 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
         */
        if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
            (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
-               if (dev)
-                       dev_put(dev);
-               dev = &loopback_dev;
-               dev_hold(dev);
+               /* hold loopback dev/idev if we haven't done so. */
+               if (dev != &loopback_dev) {
+                       if (dev) {
+                               dev_put(dev);
+                               in6_dev_put(idev);
+                       }
+                       dev = &loopback_dev;
+                       dev_hold(dev);
+                       idev = in6_dev_get(dev);
+                       if (!idev) {
+                               err = -ENODEV;
+                               goto out;
+                       }
+               }
                rt->u.dst.output = ip6_pkt_discard_out;
                rt->u.dst.input = ip6_pkt_discard;
                rt->u.dst.error = -ENETUNREACH;
@@ -838,7 +1028,9 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
                                }
                        } else {
                                dev = grt->rt6i_dev;
+                               idev = grt->rt6i_idev;
                                dev_hold(dev);
+                               in6_dev_hold(grt->rt6i_idev);
                        }
                        if (!(grt->rt6i_flags&RTF_GATEWAY))
                                err = 0;
@@ -886,49 +1078,41 @@ install_route:
                }
        }
 
-       if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
-               if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
-                       rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
-                               IPV6_DEFAULT_MCASTHOPS;
-               else
-                       rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
-                               ipv6_get_hoplimit(dev);
-       }
-
+       if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
+               rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
        if (!rt->u.dst.metrics[RTAX_MTU-1])
                rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
        if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
-               rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
+               rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
        rt->u.dst.dev = dev;
-       rt->rt6i_idev = in6_dev_get(dev);
-       return rt6_ins(rt, nlh, _rtattr);
+       rt->rt6i_idev = idev;
+       return ip6_ins_rt(rt, nlh, _rtattr, req);
 
 out:
        if (dev)
                dev_put(dev);
-       dst_free((struct dst_entry *) rt);
+       if (idev)
+               in6_dev_put(idev);
+       if (rt)
+               dst_free((struct dst_entry *) rt);
        return err;
 }
 
-int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
 {
        int err;
 
        write_lock_bh(&rt6_lock);
 
-       spin_lock_bh(&rt6_dflt_lock);
-       rt6_dflt_pointer = NULL;
-       spin_unlock_bh(&rt6_dflt_lock);
-
+       err = fib6_del(rt, nlh, _rtattr, req);
        dst_release(&rt->u.dst);
 
-       err = fib6_del(rt, nlh, _rtattr);
        write_unlock_bh(&rt6_lock);
 
        return err;
 }
 
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
+static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
 {
        struct fib6_node *fn;
        struct rt6_info *rt;
@@ -947,7 +1131,7 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
                             rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
                                continue;
                        if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
-                           ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+                           !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
                                continue;
                        if (rtmsg->rtmsg_metric &&
                            rtmsg->rtmsg_metric != rt->rt6i_metric)
@@ -955,7 +1139,7 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
                        dst_hold(&rt->u.dst);
                        read_unlock_bh(&rt6_lock);
 
-                       return ip6_del_rt(rt, nlh, _rtattr);
+                       return ip6_del_rt(rt, nlh, _rtattr, req);
                }
        }
        read_unlock_bh(&rt6_lock);
@@ -967,75 +1151,87 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
  *     Handle redirects
  */
 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
-                 struct neighbour *neigh, int on_link)
+                 struct neighbour *neigh, u8 *lladdr, int on_link)
 {
-       struct rt6_info *rt, *nrt;
-
-       /* Locate old route to this destination. */
-       rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
-
-       if (rt == NULL)
-               return;
-
-       if (neigh->dev != rt->rt6i_dev)
-               goto out;
-
-       /* Redirect received -> path was valid.
-          Look, redirects are sent only in response to data packets,
-          so that this nexthop apparently is reachable. --ANK
-        */
-       dst_confirm(&rt->u.dst);
-
-       /* Duplicate redirect: silently ignore. */
-       if (neigh == rt->u.dst.neighbour)
-               goto out;
-
-       /* Current route is on-link; redirect is always invalid.
-          
-          Seems, previous statement is not true. It could
-          be node, which looks for us as on-link (f.e. proxy ndisc)
-          But then router serving it might decide, that we should
-          know truth 8)8) --ANK (980726).
-        */
-       if (!(rt->rt6i_flags&RTF_GATEWAY))
-               goto out;
+       struct rt6_info *rt, *nrt = NULL;
+       int strict;
+       struct fib6_node *fn;
 
        /*
-        *      RFC 2461 specifies that redirects should only be
-        *      accepted if they come from the nexthop to the target.
-        *      Due to the way default routers are chosen, this notion
-        *      is a bit fuzzy and one might need to check all default
-        *      routers.
+        * Get the "current" route for this destination and
+        * check if the redirect has come from approriate router.
+        *
+        * RFC 2461 specifies that redirects should only be
+        * accepted if they come from the nexthop to the target.
+        * Due to the way the routes are chosen, this notion
+        * is a bit fuzzy and one might need to check all possible
+        * routes.
         */
+       strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
 
-       if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
-               if (rt->rt6i_flags & RTF_DEFAULT) {
-                       struct rt6_info *rt1;
-
-                       read_lock(&rt6_lock);
-                       for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
-                               if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
-                                       dst_hold(&rt1->u.dst);
-                                       dst_release(&rt->u.dst);
-                                       read_unlock(&rt6_lock);
-                                       rt = rt1;
-                                       goto source_ok;
-                               }
-                       }
-                       read_unlock(&rt6_lock);
+       read_lock_bh(&rt6_lock);
+       fn = fib6_lookup(&ip6_routing_table, dest, NULL);
+restart:
+       for (rt = fn->leaf; rt; rt = rt->u.next) {
+               /*
+                * Current route is on-link; redirect is always invalid.
+                *
+                * Seems, previous statement is not true. It could
+                * be node, which looks for us as on-link (f.e. proxy ndisc)
+                * But then router serving it might decide, that we should
+                * know truth 8)8) --ANK (980726).
+                */
+               if (rt6_check_expired(rt))
+                       continue;
+               if (!(rt->rt6i_flags & RTF_GATEWAY))
+                       continue;
+               if (neigh->dev != rt->rt6i_dev)
+                       continue;
+               if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
+                       continue;
+               break;
+       }
+       if (rt)
+               dst_hold(&rt->u.dst);
+       else if (strict) {
+               while ((fn = fn->parent) != NULL) {
+                       if (fn->fn_flags & RTN_ROOT)
+                               break;
+                       if (fn->fn_flags & RTN_RTINFO)
+                               goto restart;
                }
+       }
+       read_unlock_bh(&rt6_lock);
+
+       if (!rt) {
                if (net_ratelimit())
                        printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
                               "for redirect target\n");
-               goto out;
+               return;
        }
 
-source_ok:
-
        /*
         *      We have finally decided to accept it.
         */
 
+       neigh_update(neigh, lladdr, NUD_STALE, 
+                    NEIGH_UPDATE_F_WEAK_OVERRIDE|
+                    NEIGH_UPDATE_F_OVERRIDE|
+                    (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+                                    NEIGH_UPDATE_F_ISROUTER))
+                    );
+
+       /*
+        * Redirect received -> path was valid.
+        * Look, redirects are sent only in response to data packets,
+        * so that this nexthop apparently is reachable. --ANK
+        */
+       dst_confirm(&rt->u.dst);
+
+       /* Duplicate redirect: silently ignore. */
+       if (neigh == rt->u.dst.neighbour)
+               goto out;
+
        nrt = ip6_rt_copy(rt);
        if (nrt == NULL)
                goto out;
@@ -1052,13 +1248,13 @@ source_ok:
        nrt->rt6i_nexthop = neigh_clone(neigh);
        /* Reset pmtu, it may be better */
        nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-       nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
+       nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
 
-       if (rt6_ins(nrt, NULL, NULL))
+       if (ip6_ins_rt(nrt, NULL, NULL, NULL))
                goto out;
 
        if (rt->rt6i_flags&RTF_CACHE) {
-               ip6_del_rt(rt, NULL, NULL);
+               ip6_del_rt(rt, NULL, NULL, NULL);
                return;
        }
 
@@ -1076,26 +1272,26 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
                        struct net_device *dev, u32 pmtu)
 {
        struct rt6_info *rt, *nrt;
-
-       if (pmtu < IPV6_MIN_MTU) {
-               if (net_ratelimit())
-                       printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
-                              pmtu);
-               /* According to RFC1981, the PMTU is set to the IPv6 minimum
-                  link MTU if the node receives a Packet Too Big message
-                  reporting next-hop MTU that is less than the IPv6 minimum MTU.
-                  */
-               pmtu = IPV6_MIN_MTU;
-       }
+       int allfrag = 0;
 
        rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
-
        if (rt == NULL)
                return;
 
-       if (pmtu >= dst_pmtu(&rt->u.dst))
+       if (pmtu >= dst_mtu(&rt->u.dst))
                goto out;
 
+       if (pmtu < IPV6_MIN_MTU) {
+               /*
+                * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
+                * MTU (1280) and a fragment header should always be included
+                * after a node receiving Too Big message reporting PMTU is
+                * less than the IPv6 Minimum Link MTU.
+                */
+               pmtu = IPV6_MIN_MTU;
+               allfrag = 1;
+       }
+
        /* New mtu received -> path was valid.
           They are sent only in response to data packets,
           so that this nexthop apparently is reachable. --ANK
@@ -1109,6 +1305,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
         */
        if (rt->rt6i_flags & RTF_CACHE) {
                rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+               if (allfrag)
+                       rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
                dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
                rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
                goto out;
@@ -1119,34 +1317,27 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
           1. It is connected route. Action: COW
           2. It is gatewayed route or NONEXTHOP route. Action: clone it.
         */
-       if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-               nrt = rt6_cow(rt, daddr, saddr);
-               if (!nrt->u.dst.error) {
-                       nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
-                       /* According to RFC 1981, detecting PMTU increase shouldn't be
-                          happened within 5 mins, the recommended timer is 10 mins.
-                          Here this route expiration time is set to ip6_rt_mtu_expires
-                          which is 10 mins. After 10 mins the decreased pmtu is expired
-                          and detecting PMTU increase will be automatically happened.
-                        */
-                       dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
-                       nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
-               }
-               dst_release(&nrt->u.dst);
-       } else {
-               nrt = ip6_rt_copy(rt);
-               if (nrt == NULL)
-                       goto out;
-               ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
-               nrt->rt6i_dst.plen = 128;
-               nrt->u.dst.flags |= DST_HOST;
-               nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
-               dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
-               nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
+       if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
+               nrt = rt6_alloc_cow(rt, daddr, saddr);
+       else
+               nrt = rt6_alloc_clone(rt, daddr);
+
+       if (nrt) {
                nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
-               rt6_ins(nrt, NULL, NULL);
-       }
+               if (allfrag)
+                       nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+
+               /* According to RFC 1981, detecting PMTU increase shouldn't be
+                * happened within 5 mins, the recommended timer is 10 mins.
+                * Here this route expiration time is set to ip6_rt_mtu_expires
+                * which is 10 mins. After 10 mins the decreased pmtu is expired
+                * and detecting PMTU increase will be automatically happened.
+                */
+               dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
+               nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
+               ip6_ins_rt(nrt, NULL, NULL, NULL);
+       }
 out:
        dst_release(&rt->u.dst);
 }
@@ -1185,6 +1376,57 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
        return rt;
 }
 
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex)
+{
+       struct fib6_node *fn;
+       struct rt6_info *rt = NULL;
+
+       write_lock_bh(&rt6_lock);
+       fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
+       if (!fn)
+               goto out;
+
+       for (rt = fn->leaf; rt; rt = rt->u.next) {
+               if (rt->rt6i_dev->ifindex != ifindex)
+                       continue;
+               if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+                       continue;
+               if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
+                       continue;
+               dst_hold(&rt->u.dst);
+               break;
+       }
+out:
+       write_unlock_bh(&rt6_lock);
+       return rt;
+}
+
+static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex,
+                                          unsigned pref)
+{
+       struct in6_rtmsg rtmsg;
+
+       memset(&rtmsg, 0, sizeof(rtmsg));
+       rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+       ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
+       rtmsg.rtmsg_dst_len = prefixlen;
+       ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
+       rtmsg.rtmsg_metric = 1024;
+       rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
+       /* We should treat it as a default route if prefix length is 0. */
+       if (!prefixlen)
+               rtmsg.rtmsg_flags |= RTF_DEFAULT;
+       rtmsg.rtmsg_ifindex = ifindex;
+
+       ip6_route_add(&rtmsg, NULL, NULL, NULL);
+
+       return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
+}
+#endif
+
 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
 {      
        struct rt6_info *rt;
@@ -1195,7 +1437,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
        write_lock_bh(&rt6_lock);
        for (rt = fn->leaf; rt; rt=rt->u.next) {
                if (dev == rt->rt6i_dev &&
-                   ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
+                   ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
+                   ipv6_addr_equal(&rt->rt6i_gateway, addr))
                        break;
        }
        if (rt)
@@ -1205,7 +1448,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 }
 
 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
-                                    struct net_device *dev)
+                                    struct net_device *dev,
+                                    unsigned int pref)
 {
        struct in6_rtmsg rtmsg;
 
@@ -1213,37 +1457,28 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
        rtmsg.rtmsg_type = RTMSG_NEWROUTE;
        ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
        rtmsg.rtmsg_metric = 1024;
-       rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
+       rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
+                           RTF_PREF(pref);
 
        rtmsg.rtmsg_ifindex = dev->ifindex;
 
-       ip6_route_add(&rtmsg, NULL, NULL);
+       ip6_route_add(&rtmsg, NULL, NULL, NULL);
        return rt6_get_dflt_router(gwaddr, dev);
 }
 
-void rt6_purge_dflt_routers(int last_resort)
+void rt6_purge_dflt_routers(void)
 {
        struct rt6_info *rt;
-       u32 flags;
-
-       if (last_resort)
-               flags = RTF_ALLONLINK;
-       else
-               flags = RTF_DEFAULT | RTF_ADDRCONF;     
 
 restart:
        read_lock_bh(&rt6_lock);
        for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
-               if (rt->rt6i_flags & flags) {
+               if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
                        dst_hold(&rt->u.dst);
 
-                       spin_lock_bh(&rt6_dflt_lock);
-                       rt6_dflt_pointer = NULL;
-                       spin_unlock_bh(&rt6_dflt_lock);
-
                        read_unlock_bh(&rt6_lock);
 
-                       ip6_del_rt(rt, NULL, NULL);
+                       ip6_del_rt(rt, NULL, NULL, NULL);
 
                        goto restart;
                }
@@ -1269,10 +1504,10 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
                rtnl_lock();
                switch (cmd) {
                case SIOCADDRT:
-                       err = ip6_route_add(&rtmsg, NULL, NULL);
+                       err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
                        break;
                case SIOCDELRT:
-                       err = ip6_route_del(&rtmsg, NULL, NULL);
+                       err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
                        break;
                default:
                        err = -EINVAL;
@@ -1289,7 +1524,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
  *     Drop the packet on the floor
  */
 
-int ip6_pkt_discard(struct sk_buff *skb)
+static int ip6_pkt_discard(struct sk_buff *skb)
 {
        IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
@@ -1297,68 +1532,55 @@ int ip6_pkt_discard(struct sk_buff *skb)
        return 0;
 }
 
-int ip6_pkt_discard_out(struct sk_buff **pskb)
+static int ip6_pkt_discard_out(struct sk_buff *skb)
 {
-       return ip6_pkt_discard(*pskb);
+       skb->dev = skb->dst->dev;
+       return ip6_pkt_discard(skb);
 }
 
 /*
- *     Add address
+ *     Allocate a dst for local (unicast / anycast) address.
  */
 
-int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
+struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
+                                   const struct in6_addr *addr,
+                                   int anycast)
 {
        struct rt6_info *rt = ip6_dst_alloc();
 
        if (rt == NULL)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        dev_hold(&loopback_dev);
+       in6_dev_hold(idev);
 
        rt->u.dst.flags = DST_HOST;
        rt->u.dst.input = ip6_input;
        rt->u.dst.output = ip6_output;
        rt->rt6i_dev = &loopback_dev;
-       rt->rt6i_idev = in6_dev_get(&loopback_dev);
+       rt->rt6i_idev = idev;
        rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-       rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
-       rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
+       rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+       rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
        rt->u.dst.obsolete = -1;
 
        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-       if (!anycast)
+       if (anycast)
+               rt->rt6i_flags |= RTF_ANYCAST;
+       else
                rt->rt6i_flags |= RTF_LOCAL;
        rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
        if (rt->rt6i_nexthop == NULL) {
                dst_free((struct dst_entry *) rt);
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
        }
 
        ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
        rt->rt6i_dst.plen = 128;
-       rt6_ins(rt, NULL, NULL);
-
-       return 0;
-}
 
-/* Delete address. Warning: you should check that this address
-   disappeared before calling this function.
- */
-
-int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
-{
-       struct rt6_info *rt;
-       int err = -ENOENT;
-
-       rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
-       if (rt) {
-               if (rt->rt6i_dst.plen == 128)
-                       err = ip6_del_rt(rt, NULL, NULL);
-               else
-                       dst_release(&rt->u.dst);
-       }
+       atomic_set(&rt->u.dst.__refcnt, 1);
 
-       return err;
+       return rt;
 }
 
 static int fib6_ifdown(struct rt6_info *rt, void *arg)
@@ -1415,9 +1637,9 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
         */
        if (rt->rt6i_dev == arg->dev &&
            !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
-            (dst_pmtu(&rt->u.dst) > arg->mtu ||
-             (dst_pmtu(&rt->u.dst) < arg->mtu &&
-             dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
+            (dst_mtu(&rt->u.dst) > arg->mtu ||
+             (dst_mtu(&rt->u.dst) < arg->mtu &&
+             dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
                rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
        rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
        return 0;
@@ -1481,7 +1703,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
        if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
                return -EINVAL;
-       return ip6_route_del(&rtmsg, nlh, arg);
+       return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -1491,7 +1713,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
        if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
                return -EINVAL;
-       return ip6_route_add(&rtmsg, nlh, arg);
+       return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
 }
 
 struct rt6_rtnl_dump_arg
@@ -1501,11 +1723,9 @@ struct rt6_rtnl_dump_arg
 };
 
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
-                        struct in6_addr *dst,
-                        struct in6_addr *src,
-                        int iif,
-                        int type, u32 pid, u32 seq,
-                        struct nlmsghdr *in_nlh, int prefix)
+                        struct in6_addr *dst, struct in6_addr *src,
+                        int iif, int type, u32 pid, u32 seq,
+                        int prefix, unsigned int flags)
 {
        struct rtmsg *rtm;
        struct nlmsghdr  *nlh;
@@ -1519,11 +1739,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
                }
        }
 
-       if (!pid && in_nlh) {
-               pid = in_nlh->nlmsg_pid;
-       }
-
-       nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
+       nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
        rtm = NLMSG_DATA(nlh);
        rtm->rtm_family = AF_INET6;
        rtm->rtm_dst_len = rt->rt6i_dst.plen;
@@ -1541,7 +1757,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
        rtm->rtm_protocol = rt->rt6i_protocol;
        if (rt->rt6i_flags&RTF_DYNAMIC)
                rtm->rtm_protocol = RTPROT_REDIRECT;
-       else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
+       else if (rt->rt6i_flags & RTF_ADDRCONF)
                rtm->rtm_protocol = RTPROT_KERNEL;
        else if (rt->rt6i_flags&RTF_DEFAULT)
                rtm->rtm_protocol = RTPROT_RA;
@@ -1609,7 +1825,7 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
        return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
                     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
-                    NULL, prefix);
+                    prefix, NLM_F_MULTI);
 }
 
 static int fib6_dump_node(struct fib6_walker_t *w)
@@ -1639,16 +1855,14 @@ static void fib6_dump_end(struct netlink_callback *cb)
                fib6_walker_unlink(w);
                kfree(w);
        }
-       if (cb->args[1]) {
-               cb->done = (void*)cb->args[1];
-               cb->args[1] = 0;
-       }
+       cb->done = (void*)cb->args[1];
+       cb->args[1] = 0;
 }
 
 static int fib6_dump_done(struct netlink_callback *cb)
 {
        fib6_dump_end(cb);
-       return cb->done(cb);
+       return cb->done ? cb->done(cb) : 0;
 }
 
 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1672,11 +1886,10 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
                /*
                 * 2. allocate and initialize walker.
                 */
-               w = kmalloc(sizeof(*w), GFP_ATOMIC);
+               w = kzalloc(sizeof(*w), GFP_ATOMIC);
                if (w == NULL)
                        return -ENOMEM;
                RT6_TRACE("dump<%p", w);
-               memset(w, 0, sizeof(*w));
                w->root = &ip6_routing_table;
                w->func = fib6_dump_node;
                w->args = &arg;
@@ -1757,7 +1970,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
                            &fl.fl6_dst, &fl.fl6_src,
                            iif,
                            RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
-                           nlh->nlmsg_seq, nlh, 0);
+                           nlh->nlmsg_seq, 0, 0);
        if (err < 0) {
                err = -EMSGSIZE;
                goto out_free;
@@ -1773,23 +1986,31 @@ out_free:
        goto out;       
 }
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
+                       struct netlink_skb_parms *req)
 {
        struct sk_buff *skb;
        int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+       u32 pid = current->pid;
+       u32 seq = 0;
 
+       if (req)
+               pid = req->pid;
+       if (nlh)
+               seq = nlh->nlmsg_seq;
+       
        skb = alloc_skb(size, gfp_any());
        if (!skb) {
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
                return;
        }
-       if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
+       if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
                kfree_skb(skb);
-               netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
+               netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
                return;
        }
-       NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
+       NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
+       netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
 }
 
 /*
@@ -1890,8 +2111,6 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
        return arg.len;
 }
 
-extern struct rt6_statistics rt6_stats;
-
 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 {
        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
@@ -1928,9 +2147,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
 {
        if (write) {
                proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-               if (flush_delay < 0)
-                       flush_delay = 0;
-               fib6_run_gc((unsigned long)flush_delay);
+               fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
                return 0;
        } else
                return -EINVAL;
@@ -1942,7 +2159,7 @@ ctl_table ipv6_route_table[] = {
                .procname       =       "flush",
                .data           =       &flush_delay,
                .maxlen         =       sizeof(int),
-               .mode           =       0644,
+               .mode           =       0200,
                .proc_handler   =       &ipv6_sysctl_rtcache_flush
        },
        {
@@ -2015,6 +2232,15 @@ ctl_table ipv6_route_table[] = {
                .proc_handler   =       &proc_dointvec_jiffies,
                .strategy       =       &sysctl_jiffies,
        },
+       {
+               .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
+               .procname       =       "gc_min_interval_ms",
+               .data           =       &ip6_rt_gc_min_interval,
+               .maxlen         =       sizeof(int),
+               .mode           =       0644,
+               .proc_handler   =       &proc_dointvec_ms_jiffies,
+               .strategy       =       &sysctl_ms_jiffies,
+       },
        { .ctl_name = 0 }
 };
 
@@ -2044,7 +2270,7 @@ void __init ip6_route_init(void)
 #endif
 }
 
-void __exit ip6_route_cleanup(void)
+void ip6_route_cleanup(void)
 {
 #ifdef CONFIG_PROC_FS
        proc_net_remove("ipv6_route");