2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
89 static int ip6_dst_gc(void);
91 static int ip6_pkt_discard(struct sk_buff *skb);
92 static int ip6_pkt_discard_out(struct sk_buff *skb);
93 static void ip6_link_failure(struct sk_buff *skb);
94 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96 static struct dst_ops ip6_dst_ops = {
98 .protocol = __constant_htons(ETH_P_IPV6),
101 .check = ip6_dst_check,
102 .destroy = ip6_dst_destroy,
103 .ifdown = ip6_dst_ifdown,
104 .negative_advice = ip6_negative_advice,
105 .link_failure = ip6_link_failure,
106 .update_pmtu = ip6_rt_update_pmtu,
107 .entry_size = sizeof(struct rt6_info),
110 struct rt6_info ip6_null_entry = {
113 .__refcnt = ATOMIC_INIT(1),
115 .dev = &loopback_dev,
117 .error = -ENETUNREACH,
118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
119 .input = ip6_pkt_discard,
120 .output = ip6_pkt_discard_out,
122 .path = (struct dst_entry*)&ip6_null_entry,
125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
126 .rt6i_metric = ~(u32) 0,
127 .rt6i_ref = ATOMIC_INIT(1),
130 struct fib6_node ip6_routing_table = {
131 .leaf = &ip6_null_entry,
132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
135 /* Protects all the ip6 fib */
137 DEFINE_RWLOCK(rt6_lock);
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
146 static void ip6_dst_destroy(struct dst_entry *dst)
148 struct rt6_info *rt = (struct rt6_info *)dst;
149 struct inet6_dev *idev = rt->rt6i_idev;
152 rt->rt6i_idev = NULL;
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 if (loopback_idev != NULL) {
166 rt->rt6i_idev = loopback_idev;
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 return (rt->rt6i_flags & RTF_EXPIRES &&
175 time_after(jiffies, rt->rt6i_expires));
179 * Route lookup. Any rt6_lock is implied.
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
186 struct rt6_info *local = NULL;
187 struct rt6_info *sprt;
190 for (sprt = rt; sprt; sprt = sprt->u.next) {
191 struct net_device *dev = sprt->rt6i_dev;
192 if (dev->ifindex == oif)
194 if (dev->flags & IFF_LOOPBACK) {
195 if (sprt->rt6i_idev == NULL ||
196 sprt->rt6i_idev->dev->ifindex != oif) {
199 if (local && (!oif ||
200 local->rt6i_idev->dev->ifindex == oif))
211 return &ip6_null_entry;
217 * pointer to the last default router chosen. BH is disabled locally.
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 spin_lock_bh(&rt6_dflt_lock);
225 if (rt == NULL || rt == rt6_dflt_pointer) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 rt6_dflt_pointer = NULL;
229 spin_unlock_bh(&rt6_dflt_lock);
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 struct rt6_info *match = NULL;
236 struct rt6_info *sprt;
239 for (sprt = rt; sprt; sprt = sprt->u.next) {
240 struct neighbour *neigh;
245 sprt->rt6i_dev->ifindex == oif))
248 if (rt6_check_expired(sprt))
251 if (sprt == rt6_dflt_pointer)
254 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 read_lock_bh(&neigh->lock);
256 switch (neigh->nud_state) {
274 read_unlock_bh(&neigh->lock);
277 read_unlock_bh(&neigh->lock);
282 if (m > mpri || m >= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
296 spin_lock(&rt6_dflt_lock);
299 * No default routers are known to be reachable.
302 if (rt6_dflt_pointer) {
303 for (sprt = rt6_dflt_pointer->u.next;
304 sprt; sprt = sprt->u.next) {
305 if (sprt->u.dst.obsolete <= 0 &&
306 sprt->u.dst.error == 0 &&
307 !rt6_check_expired(sprt)) {
314 sprt = sprt->u.next) {
315 if (sprt->u.dst.obsolete <= 0 &&
316 sprt->u.dst.error == 0 &&
317 !rt6_check_expired(sprt)) {
321 if (sprt == rt6_dflt_pointer)
328 if (rt6_dflt_pointer != match)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer, match);
331 rt6_dflt_pointer = match;
333 spin_unlock(&rt6_dflt_lock);
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
341 for (sprt = ip6_routing_table.leaf;
342 sprt; sprt = sprt->u.next) {
343 if (!rt6_check_expired(sprt) &&
344 (sprt->rt6i_flags & RTF_DEFAULT) &&
347 sprt->rt6i_dev->ifindex == oif))) {
353 /* no default route. give up. */
354 match = &ip6_null_entry;
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
364 struct fib6_node *fn;
367 read_lock_bh(&rt6_lock);
368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 rt = rt6_device_match(fn->leaf, oif, strict);
370 dst_hold(&rt->u.dst);
372 read_unlock_bh(&rt6_lock);
374 rt->u.dst.lastuse = jiffies;
375 if (rt->u.dst.error == 0)
377 dst_release(&rt->u.dst);
381 /* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
391 write_lock_bh(&rt6_lock);
392 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393 write_unlock_bh(&rt6_lock);
398 /* No rt6_lock! If COW failed, the function returns dead route entry
399 with dst->error set to errno value.
402 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403 struct in6_addr *saddr)
412 rt = ip6_rt_copy(ort);
415 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
417 if (!(rt->rt6i_flags&RTF_GATEWAY))
418 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
420 rt->rt6i_dst.plen = 128;
421 rt->rt6i_flags |= RTF_CACHE;
422 rt->u.dst.flags |= DST_HOST;
424 #ifdef CONFIG_IPV6_SUBTREES
425 if (rt->rt6i_src.plen && saddr) {
426 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427 rt->rt6i_src.plen = 128;
431 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433 dst_hold(&rt->u.dst);
435 err = ip6_ins_rt(rt, NULL, NULL);
439 rt->u.dst.error = err;
443 dst_hold(&ip6_null_entry.u.dst);
444 return &ip6_null_entry;
447 #define BACKTRACK() \
448 if (rt == &ip6_null_entry && strict) { \
449 while ((fn = fn->parent) != NULL) { \
450 if (fn->fn_flags & RTN_ROOT) { \
451 dst_hold(&rt->u.dst); \
454 if (fn->fn_flags & RTN_RTINFO) \
460 void ip6_route_input(struct sk_buff *skb)
462 struct fib6_node *fn;
467 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
470 read_lock_bh(&rt6_lock);
472 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473 &skb->nh.ipv6h->saddr);
478 if ((rt->rt6i_flags & RTF_CACHE)) {
479 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
481 dst_hold(&rt->u.dst);
485 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
488 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489 struct rt6_info *nrt;
490 dst_hold(&rt->u.dst);
491 read_unlock_bh(&rt6_lock);
493 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494 &skb->nh.ipv6h->saddr);
496 dst_release(&rt->u.dst);
499 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
502 /* Race condition! In the gap, when rt6_lock was
503 released someone could insert this route. Relookup.
505 dst_release(&rt->u.dst);
508 dst_hold(&rt->u.dst);
511 read_unlock_bh(&rt6_lock);
513 rt->u.dst.lastuse = jiffies;
515 skb->dst = (struct dst_entry *) rt;
518 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
520 struct fib6_node *fn;
525 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
528 read_lock_bh(&rt6_lock);
530 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
535 if ((rt->rt6i_flags & RTF_CACHE)) {
536 rt = rt6_device_match(rt, fl->oif, strict);
538 dst_hold(&rt->u.dst);
541 if (rt->rt6i_flags & RTF_DEFAULT) {
542 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543 rt = rt6_best_dflt(rt, fl->oif);
545 rt = rt6_device_match(rt, fl->oif, strict);
549 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550 struct rt6_info *nrt;
551 dst_hold(&rt->u.dst);
552 read_unlock_bh(&rt6_lock);
554 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
556 dst_release(&rt->u.dst);
559 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
562 /* Race condition! In the gap, when rt6_lock was
563 released someone could insert this route. Relookup.
565 dst_release(&rt->u.dst);
568 dst_hold(&rt->u.dst);
571 read_unlock_bh(&rt6_lock);
573 rt->u.dst.lastuse = jiffies;
580 * Destination cache support functions
583 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
587 rt = (struct rt6_info *) dst;
589 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
596 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
598 struct rt6_info *rt = (struct rt6_info *) dst;
601 if (rt->rt6i_flags & RTF_CACHE)
602 ip6_del_rt(rt, NULL, NULL);
609 static void ip6_link_failure(struct sk_buff *skb)
613 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
615 rt = (struct rt6_info *) skb->dst;
617 if (rt->rt6i_flags&RTF_CACHE) {
618 dst_set_expires(&rt->u.dst, 0);
619 rt->rt6i_flags |= RTF_EXPIRES;
620 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
621 rt->rt6i_node->fn_sernum = -1;
625 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
627 struct rt6_info *rt6 = (struct rt6_info*)dst;
629 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
630 rt6->rt6i_flags |= RTF_MODIFIED;
631 if (mtu < IPV6_MIN_MTU)
633 dst->metrics[RTAX_MTU-1] = mtu;
637 /* Protected by rt6_lock. */
638 static struct dst_entry *ndisc_dst_gc_list;
639 static int ipv6_get_mtu(struct net_device *dev);
641 static inline unsigned int ipv6_advmss(unsigned int mtu)
643 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
645 if (mtu < ip6_rt_min_advmss)
646 mtu = ip6_rt_min_advmss;
649 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
650 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
651 * IPV6_MAXPLEN is also valid and means: "any MSS,
652 * rely only on pmtu discovery"
654 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
659 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
660 struct neighbour *neigh,
661 struct in6_addr *addr,
662 int (*output)(struct sk_buff *))
665 struct inet6_dev *idev = in6_dev_get(dev);
667 if (unlikely(idev == NULL))
670 rt = ip6_dst_alloc();
671 if (unlikely(rt == NULL)) {
680 neigh = ndisc_get_neigh(dev, addr);
683 rt->rt6i_idev = idev;
684 rt->rt6i_nexthop = neigh;
685 atomic_set(&rt->u.dst.__refcnt, 1);
686 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
687 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
688 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
689 rt->u.dst.output = output;
691 #if 0 /* there's no chance to use these for ndisc */
692 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
695 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
696 rt->rt6i_dst.plen = 128;
699 write_lock_bh(&rt6_lock);
700 rt->u.dst.next = ndisc_dst_gc_list;
701 ndisc_dst_gc_list = &rt->u.dst;
702 write_unlock_bh(&rt6_lock);
704 fib6_force_start_gc();
707 return (struct dst_entry *)rt;
710 int ndisc_dst_gc(int *more)
712 struct dst_entry *dst, *next, **pprev;
716 pprev = &ndisc_dst_gc_list;
718 while ((dst = *pprev) != NULL) {
719 if (!atomic_read(&dst->__refcnt)) {
732 static int ip6_dst_gc(void)
734 static unsigned expire = 30*HZ;
735 static unsigned long last_gc;
736 unsigned long now = jiffies;
738 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
739 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
745 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
746 expire = ip6_rt_gc_timeout>>1;
749 expire -= expire>>ip6_rt_gc_elasticity;
750 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
753 /* Clean host part of a prefix. Not necessary in radix tree,
754 but results in cleaner routing tables.
756 Remove it only when all the things will work!
759 static int ipv6_get_mtu(struct net_device *dev)
761 int mtu = IPV6_MIN_MTU;
762 struct inet6_dev *idev;
764 idev = in6_dev_get(dev);
766 mtu = idev->cnf.mtu6;
772 static int ipv6_get_hoplimit(struct net_device *dev)
774 int hoplimit = ipv6_devconf.hop_limit;
775 struct inet6_dev *idev;
777 idev = in6_dev_get(dev);
779 hoplimit = idev->cnf.hop_limit;
789 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
794 struct rt6_info *rt = NULL;
795 struct net_device *dev = NULL;
796 struct inet6_dev *idev = NULL;
799 rta = (struct rtattr **) _rtattr;
801 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
803 #ifndef CONFIG_IPV6_SUBTREES
804 if (rtmsg->rtmsg_src_len)
807 if (rtmsg->rtmsg_ifindex) {
809 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
812 idev = in6_dev_get(dev);
817 if (rtmsg->rtmsg_metric == 0)
818 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
820 rt = ip6_dst_alloc();
827 rt->u.dst.obsolete = -1;
828 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
829 if (nlh && (r = NLMSG_DATA(nlh))) {
830 rt->rt6i_protocol = r->rtm_protocol;
832 rt->rt6i_protocol = RTPROT_BOOT;
835 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
837 if (addr_type & IPV6_ADDR_MULTICAST)
838 rt->u.dst.input = ip6_mc_input;
840 rt->u.dst.input = ip6_forward;
842 rt->u.dst.output = ip6_output;
844 ipv6_addr_prefix(&rt->rt6i_dst.addr,
845 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
846 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
847 if (rt->rt6i_dst.plen == 128)
848 rt->u.dst.flags = DST_HOST;
850 #ifdef CONFIG_IPV6_SUBTREES
851 ipv6_addr_prefix(&rt->rt6i_src.addr,
852 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
853 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
856 rt->rt6i_metric = rtmsg->rtmsg_metric;
858 /* We cannot add true routes via loopback here,
859 they would result in kernel looping; promote them to reject routes
861 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
862 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
863 /* hold loopback dev/idev if we haven't done so. */
864 if (dev != &loopback_dev) {
871 idev = in6_dev_get(dev);
877 rt->u.dst.output = ip6_pkt_discard_out;
878 rt->u.dst.input = ip6_pkt_discard;
879 rt->u.dst.error = -ENETUNREACH;
880 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
884 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
885 struct in6_addr *gw_addr;
888 gw_addr = &rtmsg->rtmsg_gateway;
889 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
890 gwa_type = ipv6_addr_type(gw_addr);
892 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
893 struct rt6_info *grt;
895 /* IPv6 strictly inhibits using not link-local
896 addresses as nexthop address.
897 Otherwise, router will not able to send redirects.
898 It is very good, but in some (rare!) circumstances
899 (SIT, PtP, NBMA NOARP links) it is handy to allow
900 some exceptions. --ANK
903 if (!(gwa_type&IPV6_ADDR_UNICAST))
906 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
912 if (dev != grt->rt6i_dev) {
913 dst_release(&grt->u.dst);
918 idev = grt->rt6i_idev;
920 in6_dev_hold(grt->rt6i_idev);
922 if (!(grt->rt6i_flags&RTF_GATEWAY))
924 dst_release(&grt->u.dst);
930 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
938 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
939 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
940 if (IS_ERR(rt->rt6i_nexthop)) {
941 err = PTR_ERR(rt->rt6i_nexthop);
942 rt->rt6i_nexthop = NULL;
947 rt->rt6i_flags = rtmsg->rtmsg_flags;
950 if (rta && rta[RTA_METRICS-1]) {
951 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
952 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
954 while (RTA_OK(attr, attrlen)) {
955 unsigned flavor = attr->rta_type;
957 if (flavor > RTAX_MAX) {
961 rt->u.dst.metrics[flavor-1] =
962 *(u32 *)RTA_DATA(attr);
964 attr = RTA_NEXT(attr, attrlen);
968 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
969 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
970 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
971 IPV6_DEFAULT_MCASTHOPS;
973 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
974 ipv6_get_hoplimit(dev);
977 if (!rt->u.dst.metrics[RTAX_MTU-1])
978 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
979 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
980 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
982 rt->rt6i_idev = idev;
983 return ip6_ins_rt(rt, nlh, _rtattr);
991 dst_free((struct dst_entry *) rt);
995 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
999 write_lock_bh(&rt6_lock);
1001 rt6_reset_dflt_pointer(NULL);
1003 err = fib6_del(rt, nlh, _rtattr);
1004 dst_release(&rt->u.dst);
1006 write_unlock_bh(&rt6_lock);
1011 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1013 struct fib6_node *fn;
1014 struct rt6_info *rt;
1017 read_lock_bh(&rt6_lock);
1019 fn = fib6_locate(&ip6_routing_table,
1020 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1021 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1024 for (rt = fn->leaf; rt; rt = rt->u.next) {
1025 if (rtmsg->rtmsg_ifindex &&
1026 (rt->rt6i_dev == NULL ||
1027 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1029 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1030 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1032 if (rtmsg->rtmsg_metric &&
1033 rtmsg->rtmsg_metric != rt->rt6i_metric)
1035 dst_hold(&rt->u.dst);
1036 read_unlock_bh(&rt6_lock);
1038 return ip6_del_rt(rt, nlh, _rtattr);
1041 read_unlock_bh(&rt6_lock);
1049 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1050 struct neighbour *neigh, u8 *lladdr, int on_link)
1052 struct rt6_info *rt, *nrt;
1054 /* Locate old route to this destination. */
1055 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1060 if (neigh->dev != rt->rt6i_dev)
1064 * Current route is on-link; redirect is always invalid.
1066 * Seems, previous statement is not true. It could
1067 * be node, which looks for us as on-link (f.e. proxy ndisc)
1068 * But then router serving it might decide, that we should
1069 * know truth 8)8) --ANK (980726).
1071 if (!(rt->rt6i_flags&RTF_GATEWAY))
1075 * RFC 2461 specifies that redirects should only be
1076 * accepted if they come from the nexthop to the target.
1077 * Due to the way default routers are chosen, this notion
1078 * is a bit fuzzy and one might need to check all default
1081 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1082 if (rt->rt6i_flags & RTF_DEFAULT) {
1083 struct rt6_info *rt1;
1085 read_lock(&rt6_lock);
1086 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1087 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1088 dst_hold(&rt1->u.dst);
1089 dst_release(&rt->u.dst);
1090 read_unlock(&rt6_lock);
1095 read_unlock(&rt6_lock);
1097 if (net_ratelimit())
1098 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1099 "for redirect target\n");
1106 * We have finally decided to accept it.
1109 neigh_update(neigh, lladdr, NUD_STALE,
1110 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1111 NEIGH_UPDATE_F_OVERRIDE|
1112 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1113 NEIGH_UPDATE_F_ISROUTER))
1117 * Redirect received -> path was valid.
1118 * Look, redirects are sent only in response to data packets,
1119 * so that this nexthop apparently is reachable. --ANK
1121 dst_confirm(&rt->u.dst);
1123 /* Duplicate redirect: silently ignore. */
1124 if (neigh == rt->u.dst.neighbour)
1127 nrt = ip6_rt_copy(rt);
1131 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1133 nrt->rt6i_flags &= ~RTF_GATEWAY;
1135 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1136 nrt->rt6i_dst.plen = 128;
1137 nrt->u.dst.flags |= DST_HOST;
1139 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1140 nrt->rt6i_nexthop = neigh_clone(neigh);
1141 /* Reset pmtu, it may be better */
1142 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1143 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1145 if (ip6_ins_rt(nrt, NULL, NULL))
1148 if (rt->rt6i_flags&RTF_CACHE) {
1149 ip6_del_rt(rt, NULL, NULL);
1154 dst_release(&rt->u.dst);
1159 * Handle ICMP "packet too big" messages
1160 * i.e. Path MTU discovery
1163 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1164 struct net_device *dev, u32 pmtu)
1166 struct rt6_info *rt, *nrt;
1168 if (pmtu < IPV6_MIN_MTU) {
1169 if (net_ratelimit())
1170 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1172 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1173 link MTU if the node receives a Packet Too Big message
1174 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1176 pmtu = IPV6_MIN_MTU;
1179 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1184 if (pmtu >= dst_pmtu(&rt->u.dst))
1187 /* New mtu received -> path was valid.
1188 They are sent only in response to data packets,
1189 so that this nexthop apparently is reachable. --ANK
1191 dst_confirm(&rt->u.dst);
1193 /* Host route. If it is static, it would be better
1194 not to override it, but add new one, so that
1195 when cache entry will expire old pmtu
1196 would return automatically.
1198 if (rt->rt6i_flags & RTF_CACHE) {
1199 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1200 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1201 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1206 Two cases are possible:
1207 1. It is connected route. Action: COW
1208 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1210 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1211 nrt = rt6_cow(rt, daddr, saddr);
1212 if (!nrt->u.dst.error) {
1213 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1214 /* According to RFC 1981, detecting PMTU increase shouldn't be
1215 happened within 5 mins, the recommended timer is 10 mins.
1216 Here this route expiration time is set to ip6_rt_mtu_expires
1217 which is 10 mins. After 10 mins the decreased pmtu is expired
1218 and detecting PMTU increase will be automatically happened.
1220 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1221 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1223 dst_release(&nrt->u.dst);
1225 nrt = ip6_rt_copy(rt);
1228 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1229 nrt->rt6i_dst.plen = 128;
1230 nrt->u.dst.flags |= DST_HOST;
1231 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1232 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1233 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1234 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1235 ip6_ins_rt(nrt, NULL, NULL);
1239 dst_release(&rt->u.dst);
1243 * Misc support functions
1246 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1248 struct rt6_info *rt = ip6_dst_alloc();
1251 rt->u.dst.input = ort->u.dst.input;
1252 rt->u.dst.output = ort->u.dst.output;
1254 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255 rt->u.dst.dev = ort->u.dst.dev;
1257 dev_hold(rt->u.dst.dev);
1258 rt->rt6i_idev = ort->rt6i_idev;
1260 in6_dev_hold(rt->rt6i_idev);
1261 rt->u.dst.lastuse = jiffies;
1262 rt->rt6i_expires = 0;
1264 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266 rt->rt6i_metric = 0;
1268 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269 #ifdef CONFIG_IPV6_SUBTREES
1270 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1276 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1278 struct rt6_info *rt;
1279 struct fib6_node *fn;
1281 fn = &ip6_routing_table;
1283 write_lock_bh(&rt6_lock);
1284 for (rt = fn->leaf; rt; rt=rt->u.next) {
1285 if (dev == rt->rt6i_dev &&
1286 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1290 dst_hold(&rt->u.dst);
1291 write_unlock_bh(&rt6_lock);
1295 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296 struct net_device *dev)
1298 struct in6_rtmsg rtmsg;
1300 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303 rtmsg.rtmsg_metric = 1024;
1304 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1306 rtmsg.rtmsg_ifindex = dev->ifindex;
1308 ip6_route_add(&rtmsg, NULL, NULL);
1309 return rt6_get_dflt_router(gwaddr, dev);
1312 void rt6_purge_dflt_routers(void)
1314 struct rt6_info *rt;
1317 read_lock_bh(&rt6_lock);
1318 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320 dst_hold(&rt->u.dst);
1322 rt6_reset_dflt_pointer(NULL);
1324 read_unlock_bh(&rt6_lock);
1326 ip6_del_rt(rt, NULL, NULL);
1331 read_unlock_bh(&rt6_lock);
1334 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1336 struct in6_rtmsg rtmsg;
1340 case SIOCADDRT: /* Add a route */
1341 case SIOCDELRT: /* Delete a route */
1342 if (!capable(CAP_NET_ADMIN))
1344 err = copy_from_user(&rtmsg, arg,
1345 sizeof(struct in6_rtmsg));
1352 err = ip6_route_add(&rtmsg, NULL, NULL);
1355 err = ip6_route_del(&rtmsg, NULL, NULL);
1369 * Drop the packet on the floor
1372 int ip6_pkt_discard(struct sk_buff *skb)
1374 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1380 int ip6_pkt_discard_out(struct sk_buff *skb)
1382 skb->dev = skb->dst->dev;
1383 return ip6_pkt_discard(skb);
1387 * Allocate a dst for local (unicast / anycast) address.
1390 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391 const struct in6_addr *addr,
1394 struct rt6_info *rt = ip6_dst_alloc();
1397 return ERR_PTR(-ENOMEM);
1399 dev_hold(&loopback_dev);
1402 rt->u.dst.flags = DST_HOST;
1403 rt->u.dst.input = ip6_input;
1404 rt->u.dst.output = ip6_output;
1405 rt->rt6i_dev = &loopback_dev;
1406 rt->rt6i_idev = idev;
1407 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1409 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1410 rt->u.dst.obsolete = -1;
1412 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1414 rt->rt6i_flags |= RTF_LOCAL;
1415 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416 if (rt->rt6i_nexthop == NULL) {
1417 dst_free((struct dst_entry *) rt);
1418 return ERR_PTR(-ENOMEM);
1421 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422 rt->rt6i_dst.plen = 128;
1424 atomic_set(&rt->u.dst.__refcnt, 1);
1429 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1431 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432 rt != &ip6_null_entry) {
1433 RT6_TRACE("deleted by ifdown %p\n", rt);
1439 void rt6_ifdown(struct net_device *dev)
1441 write_lock_bh(&rt6_lock);
1442 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443 write_unlock_bh(&rt6_lock);
1446 struct rt6_mtu_change_arg
1448 struct net_device *dev;
1452 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1454 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455 struct inet6_dev *idev;
1457 /* In IPv6 pmtu discovery is not optional,
1458 so that RTAX_MTU lock cannot disable it.
1459 We still use this lock to block changes
1460 caused by addrconf/ndisc.
1463 idev = __in6_dev_get(arg->dev);
1467 /* For administrative MTU increase, there is no way to discover
1468 IPv6 PMTU increase, so PMTU increase should be updated here.
1469 Since RFC 1981 doesn't include administrative MTU increase
1470 update PMTU increase is a MUST. (i.e. jumbo frame)
1473 If new MTU is less than route PMTU, this new MTU will be the
1474 lowest MTU in the path, update the route PMTU to reflect PMTU
1475 decreases; if new MTU is greater than route PMTU, and the
1476 old MTU is the lowest MTU in the path, update the route PMTU
1477 to reflect the increase. In this case if the other nodes' MTU
1478 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1481 if (rt->rt6i_dev == arg->dev &&
1482 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1484 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1485 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1486 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1491 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1493 struct rt6_mtu_change_arg arg;
1497 read_lock_bh(&rt6_lock);
1498 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499 read_unlock_bh(&rt6_lock);
1502 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503 struct in6_rtmsg *rtmsg)
1505 memset(rtmsg, 0, sizeof(*rtmsg));
1507 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508 rtmsg->rtmsg_src_len = r->rtm_src_len;
1509 rtmsg->rtmsg_flags = RTF_UP;
1510 if (r->rtm_type == RTN_UNREACHABLE)
1511 rtmsg->rtmsg_flags |= RTF_REJECT;
1513 if (rta[RTA_GATEWAY-1]) {
1514 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1516 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1519 if (rta[RTA_DST-1]) {
1520 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1522 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1524 if (rta[RTA_SRC-1]) {
1525 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1527 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1529 if (rta[RTA_OIF-1]) {
1530 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1532 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1534 if (rta[RTA_PRIORITY-1]) {
1535 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1537 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1542 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1544 struct rtmsg *r = NLMSG_DATA(nlh);
1545 struct in6_rtmsg rtmsg;
1547 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1549 return ip6_route_del(&rtmsg, nlh, arg);
1552 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1554 struct rtmsg *r = NLMSG_DATA(nlh);
1555 struct in6_rtmsg rtmsg;
1557 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1559 return ip6_route_add(&rtmsg, nlh, arg);
1562 struct rt6_rtnl_dump_arg
1564 struct sk_buff *skb;
1565 struct netlink_callback *cb;
1568 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569 struct in6_addr *dst,
1570 struct in6_addr *src,
1572 int type, u32 pid, u32 seq,
1573 struct nlmsghdr *in_nlh, int prefix)
1576 struct nlmsghdr *nlh;
1577 unsigned char *b = skb->tail;
1578 struct rta_cacheinfo ci;
1580 if (prefix) { /* user wants prefix routes only */
1581 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1582 /* success since this is not a prefix route */
1587 if (!pid && in_nlh) {
1588 pid = in_nlh->nlmsg_pid;
1591 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1592 rtm = NLMSG_DATA(nlh);
1593 rtm->rtm_family = AF_INET6;
1594 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1595 rtm->rtm_src_len = rt->rt6i_src.plen;
1597 rtm->rtm_table = RT_TABLE_MAIN;
1598 if (rt->rt6i_flags&RTF_REJECT)
1599 rtm->rtm_type = RTN_UNREACHABLE;
1600 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1601 rtm->rtm_type = RTN_LOCAL;
1603 rtm->rtm_type = RTN_UNICAST;
1605 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1606 rtm->rtm_protocol = rt->rt6i_protocol;
1607 if (rt->rt6i_flags&RTF_DYNAMIC)
1608 rtm->rtm_protocol = RTPROT_REDIRECT;
1609 else if (rt->rt6i_flags & RTF_ADDRCONF)
1610 rtm->rtm_protocol = RTPROT_KERNEL;
1611 else if (rt->rt6i_flags&RTF_DEFAULT)
1612 rtm->rtm_protocol = RTPROT_RA;
1614 if (rt->rt6i_flags&RTF_CACHE)
1615 rtm->rtm_flags |= RTM_F_CLONED;
1618 RTA_PUT(skb, RTA_DST, 16, dst);
1619 rtm->rtm_dst_len = 128;
1620 } else if (rtm->rtm_dst_len)
1621 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1622 #ifdef CONFIG_IPV6_SUBTREES
1624 RTA_PUT(skb, RTA_SRC, 16, src);
1625 rtm->rtm_src_len = 128;
1626 } else if (rtm->rtm_src_len)
1627 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1630 RTA_PUT(skb, RTA_IIF, 4, &iif);
1632 struct in6_addr saddr_buf;
1633 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1634 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1636 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1637 goto rtattr_failure;
1638 if (rt->u.dst.neighbour)
1639 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1641 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1642 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1643 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1644 if (rt->rt6i_expires)
1645 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1648 ci.rta_used = rt->u.dst.__use;
1649 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1650 ci.rta_error = rt->u.dst.error;
1654 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1655 nlh->nlmsg_len = skb->tail - b;
1660 skb_trim(skb, b - skb->data);
1664 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1666 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1669 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1670 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1671 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1675 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1680 static int fib6_dump_node(struct fib6_walker_t *w)
1683 struct rt6_info *rt;
1685 for (rt = w->leaf; rt; rt = rt->u.next) {
1686 res = rt6_dump_route(rt, w->args);
1688 /* Frame is full, suspend walking */
1698 static void fib6_dump_end(struct netlink_callback *cb)
1700 struct fib6_walker_t *w = (void*)cb->args[0];
1704 fib6_walker_unlink(w);
1708 cb->done = (void*)cb->args[1];
1713 static int fib6_dump_done(struct netlink_callback *cb)
1716 return cb->done(cb);
1719 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1721 struct rt6_rtnl_dump_arg arg;
1722 struct fib6_walker_t *w;
1728 w = (void*)cb->args[0];
1732 * 1. hook callback destructor.
1734 cb->args[1] = (long)cb->done;
1735 cb->done = fib6_dump_done;
1738 * 2. allocate and initialize walker.
1740 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1743 RT6_TRACE("dump<%p", w);
1744 memset(w, 0, sizeof(*w));
1745 w->root = &ip6_routing_table;
1746 w->func = fib6_dump_node;
1748 cb->args[0] = (long)w;
1749 read_lock_bh(&rt6_lock);
1751 read_unlock_bh(&rt6_lock);
1754 read_lock_bh(&rt6_lock);
1755 res = fib6_walk_continue(w);
1756 read_unlock_bh(&rt6_lock);
1759 if (res <= 0 && skb->len == 0)
1760 RT6_TRACE("%p>dump end\n", w);
1762 res = res < 0 ? res : skb->len;
1763 /* res < 0 is an error. (really, impossible)
1764 res == 0 means that dump is complete, but skb still can contain data.
1765 res > 0 dump is not complete, but frame is full.
1767 /* Destroy walker, if dump of this table is complete. */
1773 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1775 struct rtattr **rta = arg;
1778 struct sk_buff *skb;
1780 struct rt6_info *rt;
1782 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1786 /* Reserve room for dummy headers, this skb can pass
1787 through good chunk of routing engine.
1789 skb->mac.raw = skb->data;
1790 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1792 memset(&fl, 0, sizeof(fl));
1794 ipv6_addr_copy(&fl.fl6_src,
1795 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1797 ipv6_addr_copy(&fl.fl6_dst,
1798 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1801 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1804 struct net_device *dev;
1805 dev = __dev_get_by_index(iif);
1814 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1816 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1818 skb->dst = &rt->u.dst;
1820 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821 err = rt6_fill_node(skb, rt,
1822 &fl.fl6_dst, &fl.fl6_src,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 nlh->nlmsg_seq, nlh, 0);
1831 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1841 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1843 struct sk_buff *skb;
1844 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1846 skb = alloc_skb(size, gfp_any());
1848 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1851 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1853 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1856 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1857 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1864 #ifdef CONFIG_PROC_FS
1866 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1877 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1879 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1882 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1887 if (arg->len >= arg->length)
1890 for (i=0; i<16; i++) {
1891 sprintf(arg->buffer + arg->len, "%02x",
1892 rt->rt6i_dst.addr.s6_addr[i]);
1895 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1898 #ifdef CONFIG_IPV6_SUBTREES
1899 for (i=0; i<16; i++) {
1900 sprintf(arg->buffer + arg->len, "%02x",
1901 rt->rt6i_src.addr.s6_addr[i]);
1904 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1907 sprintf(arg->buffer + arg->len,
1908 "00000000000000000000000000000000 00 ");
1912 if (rt->rt6i_nexthop) {
1913 for (i=0; i<16; i++) {
1914 sprintf(arg->buffer + arg->len, "%02x",
1915 rt->rt6i_nexthop->primary_key[i]);
1919 sprintf(arg->buffer + arg->len,
1920 "00000000000000000000000000000000");
1923 arg->len += sprintf(arg->buffer + arg->len,
1924 " %08x %08x %08x %08x %8s\n",
1925 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1926 rt->u.dst.__use, rt->rt6i_flags,
1927 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1931 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1933 struct rt6_proc_arg arg;
1934 arg.buffer = buffer;
1935 arg.offset = offset;
1936 arg.length = length;
1940 read_lock_bh(&rt6_lock);
1941 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1942 read_unlock_bh(&rt6_lock);
1946 *start += offset % RT6_INFO_LEN;
1948 arg.len -= offset % RT6_INFO_LEN;
1950 if (arg.len > length)
1958 extern struct rt6_statistics rt6_stats;
1960 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1962 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1963 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1964 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1965 rt6_stats.fib_rt_cache,
1966 atomic_read(&ip6_dst_ops.entries),
1967 rt6_stats.fib_discarded_routes);
1972 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1974 return single_open(file, rt6_stats_seq_show, NULL);
1977 static struct file_operations rt6_stats_seq_fops = {
1978 .owner = THIS_MODULE,
1979 .open = rt6_stats_seq_open,
1981 .llseek = seq_lseek,
1982 .release = single_release,
1984 #endif /* CONFIG_PROC_FS */
1986 #ifdef CONFIG_SYSCTL
1988 static int flush_delay;
1991 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1992 void __user *buffer, size_t *lenp, loff_t *ppos)
1995 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1996 if (flush_delay < 0)
1998 fib6_run_gc((unsigned long)flush_delay);
2004 ctl_table ipv6_route_table[] = {
2006 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2007 .procname = "flush",
2008 .data = &flush_delay,
2009 .maxlen = sizeof(int),
2011 .proc_handler = &ipv6_sysctl_rtcache_flush
2014 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2015 .procname = "gc_thresh",
2016 .data = &ip6_dst_ops.gc_thresh,
2017 .maxlen = sizeof(int),
2019 .proc_handler = &proc_dointvec,
2022 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2023 .procname = "max_size",
2024 .data = &ip6_rt_max_size,
2025 .maxlen = sizeof(int),
2027 .proc_handler = &proc_dointvec,
2030 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2031 .procname = "gc_min_interval",
2032 .data = &ip6_rt_gc_min_interval,
2033 .maxlen = sizeof(int),
2035 .proc_handler = &proc_dointvec_jiffies,
2036 .strategy = &sysctl_jiffies,
2039 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2040 .procname = "gc_timeout",
2041 .data = &ip6_rt_gc_timeout,
2042 .maxlen = sizeof(int),
2044 .proc_handler = &proc_dointvec_jiffies,
2045 .strategy = &sysctl_jiffies,
2048 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2049 .procname = "gc_interval",
2050 .data = &ip6_rt_gc_interval,
2051 .maxlen = sizeof(int),
2053 .proc_handler = &proc_dointvec_jiffies,
2054 .strategy = &sysctl_jiffies,
2057 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2058 .procname = "gc_elasticity",
2059 .data = &ip6_rt_gc_elasticity,
2060 .maxlen = sizeof(int),
2062 .proc_handler = &proc_dointvec_jiffies,
2063 .strategy = &sysctl_jiffies,
2066 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2067 .procname = "mtu_expires",
2068 .data = &ip6_rt_mtu_expires,
2069 .maxlen = sizeof(int),
2071 .proc_handler = &proc_dointvec_jiffies,
2072 .strategy = &sysctl_jiffies,
2075 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2076 .procname = "min_adv_mss",
2077 .data = &ip6_rt_min_advmss,
2078 .maxlen = sizeof(int),
2080 .proc_handler = &proc_dointvec_jiffies,
2081 .strategy = &sysctl_jiffies,
2088 void __init ip6_route_init(void)
2090 struct proc_dir_entry *p;
2092 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2093 sizeof(struct rt6_info),
2094 0, SLAB_HWCACHE_ALIGN,
2096 if (!ip6_dst_ops.kmem_cachep)
2097 panic("cannot create ip6_dst_cache");
2100 #ifdef CONFIG_PROC_FS
2101 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2103 p->owner = THIS_MODULE;
2105 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2112 void __exit ip6_route_cleanup(void)
2114 #ifdef CONFIG_PROC_FS
2115 proc_net_remove("ipv6_route");
2116 proc_net_remove("rt6_stats");
2123 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);