2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static int ip6_dst_gc(void);
89 static int ip6_pkt_discard(struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
93 static struct dst_ops ip6_dst_ops = {
95 .protocol = __constant_htons(ETH_P_IPV6),
98 .check = ip6_dst_check,
99 .destroy = ip6_dst_destroy,
100 .negative_advice = ip6_negative_advice,
101 .link_failure = ip6_link_failure,
102 .update_pmtu = ip6_rt_update_pmtu,
103 .entry_size = sizeof(struct rt6_info),
106 struct rt6_info ip6_null_entry = {
109 .__refcnt = ATOMIC_INIT(1),
111 .dev = &loopback_dev,
113 .error = -ENETUNREACH,
114 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
115 .input = ip6_pkt_discard,
116 .output = ip6_pkt_discard,
118 .path = (struct dst_entry*)&ip6_null_entry,
121 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
122 .rt6i_metric = ~(u32) 0,
123 .rt6i_ref = ATOMIC_INIT(1),
126 struct fib6_node ip6_routing_table = {
127 .leaf = &ip6_null_entry,
128 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
131 /* Protects all the ip6 fib */
133 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
136 /* allocate dst with ip6_dst_ops */
137 static __inline__ struct rt6_info *ip6_dst_alloc(void)
139 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
142 static void ip6_dst_destroy(struct dst_entry *dst)
144 struct rt6_info *rt = (struct rt6_info *)dst;
145 if (rt->rt6i_idev != NULL)
146 in6_dev_put(rt->rt6i_idev);
151 * Route lookup. Any rt6_lock is implied.
154 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
158 struct rt6_info *local = NULL;
159 struct rt6_info *sprt;
162 for (sprt = rt; sprt; sprt = sprt->u.next) {
163 struct net_device *dev = sprt->rt6i_dev;
164 if (dev->ifindex == oif)
166 if (dev->flags&IFF_LOOPBACK)
174 return &ip6_null_entry;
180 * pointer to the last default router chosen. BH is disabled locally.
182 static struct rt6_info *rt6_dflt_pointer;
183 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
185 /* Default Router Selection (RFC 2461 6.3.6) */
186 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
188 struct rt6_info *match = NULL;
189 struct rt6_info *sprt;
192 for (sprt = rt; sprt; sprt = sprt->u.next) {
193 struct neighbour *neigh;
198 sprt->rt6i_dev->ifindex == oif))
201 if (sprt == rt6_dflt_pointer)
204 if ((neigh = sprt->rt6i_nexthop) != NULL) {
205 read_lock_bh(&neigh->lock);
206 switch (neigh->nud_state) {
224 read_unlock_bh(&neigh->lock);
227 read_unlock_bh(&neigh->lock);
232 if (m > mpri || m >= 12) {
236 /* we choose the last default router if it
237 * is in (probably) reachable state.
238 * If route changed, we should do pmtu
239 * discovery. --yoshfuji
246 spin_lock(&rt6_dflt_lock);
249 * No default routers are known to be reachable.
252 if (rt6_dflt_pointer) {
253 for (sprt = rt6_dflt_pointer->u.next;
254 sprt; sprt = sprt->u.next) {
255 if (sprt->u.dst.obsolete <= 0 &&
256 sprt->u.dst.error == 0) {
263 sprt = sprt->u.next) {
264 if (sprt->u.dst.obsolete <= 0 &&
265 sprt->u.dst.error == 0) {
269 if (sprt == rt6_dflt_pointer)
276 if (rt6_dflt_pointer != match)
277 RT6_TRACE("changed default router: %p->%p\n",
278 rt6_dflt_pointer, match);
279 rt6_dflt_pointer = match;
281 spin_unlock(&rt6_dflt_lock);
285 * Last Resort: if no default routers found,
286 * use addrconf default route.
287 * We don't record this route.
289 for (sprt = ip6_routing_table.leaf;
290 sprt; sprt = sprt->u.next) {
291 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
294 sprt->rt6i_dev->ifindex == oif))) {
300 /* no default route. give up. */
301 match = &ip6_null_entry;
308 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
311 struct fib6_node *fn;
314 read_lock_bh(&rt6_lock);
315 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
316 rt = rt6_device_match(fn->leaf, oif, strict);
317 dst_hold(&rt->u.dst);
319 read_unlock_bh(&rt6_lock);
321 rt->u.dst.lastuse = jiffies;
322 if (rt->u.dst.error == 0)
324 dst_release(&rt->u.dst);
328 /* rt6_ins is called with FREE rt6_lock.
329 It takes new route entry, the addition fails by any reason the
330 route is freed. In any case, if caller does not hold it, it may
334 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
338 write_lock_bh(&rt6_lock);
339 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
340 write_unlock_bh(&rt6_lock);
345 /* No rt6_lock! If COW failed, the function returns dead route entry
346 with dst->error set to errno value.
349 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
350 struct in6_addr *saddr)
359 rt = ip6_rt_copy(ort);
362 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
364 if (!(rt->rt6i_flags&RTF_GATEWAY))
365 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
367 rt->rt6i_dst.plen = 128;
368 rt->rt6i_flags |= RTF_CACHE;
369 rt->u.dst.flags |= DST_HOST;
371 #ifdef CONFIG_IPV6_SUBTREES
372 if (rt->rt6i_src.plen && saddr) {
373 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
374 rt->rt6i_src.plen = 128;
378 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
380 dst_hold(&rt->u.dst);
382 err = rt6_ins(rt, NULL, NULL);
386 rt->u.dst.error = err;
390 dst_hold(&ip6_null_entry.u.dst);
391 return &ip6_null_entry;
394 #define BACKTRACK() \
395 if (rt == &ip6_null_entry && strict) { \
396 while ((fn = fn->parent) != NULL) { \
397 if (fn->fn_flags & RTN_ROOT) { \
398 dst_hold(&rt->u.dst); \
401 if (fn->fn_flags & RTN_RTINFO) \
407 void ip6_route_input(struct sk_buff *skb)
409 struct fib6_node *fn;
414 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
417 read_lock_bh(&rt6_lock);
419 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
420 &skb->nh.ipv6h->saddr);
425 if ((rt->rt6i_flags & RTF_CACHE)) {
426 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
428 dst_hold(&rt->u.dst);
432 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
435 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
436 read_unlock_bh(&rt6_lock);
438 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
439 &skb->nh.ipv6h->saddr);
441 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
443 /* Race condition! In the gap, when rt6_lock was
444 released someone could insert this route. Relookup.
446 dst_release(&rt->u.dst);
449 dst_hold(&rt->u.dst);
452 read_unlock_bh(&rt6_lock);
454 rt->u.dst.lastuse = jiffies;
456 skb->dst = (struct dst_entry *) rt;
459 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
461 struct fib6_node *fn;
466 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
469 read_lock_bh(&rt6_lock);
471 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
476 if ((rt->rt6i_flags & RTF_CACHE)) {
477 rt = rt6_device_match(rt, fl->oif, strict);
479 dst_hold(&rt->u.dst);
482 if (rt->rt6i_flags & RTF_DEFAULT) {
483 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
484 rt = rt6_best_dflt(rt, fl->oif);
486 rt = rt6_device_match(rt, fl->oif, strict);
490 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
491 read_unlock_bh(&rt6_lock);
493 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
495 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
498 /* Race condition! In the gap, when rt6_lock was
499 released someone could insert this route. Relookup.
501 dst_release(&rt->u.dst);
504 dst_hold(&rt->u.dst);
507 read_unlock_bh(&rt6_lock);
509 rt->u.dst.lastuse = jiffies;
516 * Destination cache support functions
519 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
523 rt = (struct rt6_info *) dst;
525 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
532 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
534 struct rt6_info *rt = (struct rt6_info *) dst;
537 if (rt->rt6i_flags & RTF_CACHE)
538 ip6_del_rt(rt, NULL, NULL);
545 static void ip6_link_failure(struct sk_buff *skb)
549 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
551 rt = (struct rt6_info *) skb->dst;
553 if (rt->rt6i_flags&RTF_CACHE) {
554 dst_set_expires(&rt->u.dst, 0);
555 rt->rt6i_flags |= RTF_EXPIRES;
556 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
557 rt->rt6i_node->fn_sernum = -1;
561 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
563 struct rt6_info *rt6 = (struct rt6_info*)dst;
565 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
566 rt6->rt6i_flags |= RTF_MODIFIED;
567 if (mtu < IPV6_MIN_MTU)
569 dst->metrics[RTAX_MTU-1] = mtu;
573 /* Protected by rt6_lock. */
574 static struct dst_entry *ndisc_dst_gc_list;
576 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
577 struct neighbour *neigh,
578 struct in6_addr *addr,
579 int (*output)(struct sk_buff *))
581 struct rt6_info *rt = ip6_dst_alloc();
583 if (unlikely(rt == NULL))
590 neigh = ndisc_get_neigh(dev, addr);
593 rt->rt6i_idev = in6_dev_get(dev);
594 rt->rt6i_nexthop = neigh;
595 rt->rt6i_expires = 0;
596 rt->rt6i_flags = RTF_LOCAL;
598 atomic_set(&rt->u.dst.__refcnt, 1);
599 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
600 rt->u.dst.output = output;
602 write_lock_bh(&rt6_lock);
603 rt->u.dst.next = ndisc_dst_gc_list;
604 ndisc_dst_gc_list = &rt->u.dst;
605 write_unlock_bh(&rt6_lock);
607 fib6_force_start_gc();
610 return (struct dst_entry *)rt;
613 int ndisc_dst_gc(int *more)
615 struct dst_entry *dst, *next, **pprev;
619 pprev = &ndisc_dst_gc_list;
621 while ((dst = *pprev) != NULL) {
622 if (!atomic_read(&dst->__refcnt)) {
635 static int ip6_dst_gc(void)
637 static unsigned expire = 30*HZ;
638 static unsigned long last_gc;
639 unsigned long now = jiffies;
641 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
642 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
648 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
649 expire = ip6_rt_gc_timeout>>1;
652 expire -= expire>>ip6_rt_gc_elasticity;
653 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
656 /* Clean host part of a prefix. Not necessary in radix tree,
657 but results in cleaner routing tables.
659 Remove it only when all the things will work!
662 static int ipv6_get_mtu(struct net_device *dev)
664 int mtu = IPV6_MIN_MTU;
665 struct inet6_dev *idev;
667 idev = in6_dev_get(dev);
669 mtu = idev->cnf.mtu6;
675 static inline unsigned int ipv6_advmss(unsigned int mtu)
677 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
679 if (mtu < ip6_rt_min_advmss)
680 mtu = ip6_rt_min_advmss;
683 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
684 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
685 * IPV6_MAXPLEN is also valid and means: "any MSS,
686 * rely only on pmtu discovery"
688 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
693 static int ipv6_get_hoplimit(struct net_device *dev)
695 int hoplimit = ipv6_devconf.hop_limit;
696 struct inet6_dev *idev;
698 idev = in6_dev_get(dev);
700 hoplimit = idev->cnf.hop_limit;
710 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
716 struct net_device *dev = NULL;
719 rta = (struct rtattr **) _rtattr;
721 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
723 #ifndef CONFIG_IPV6_SUBTREES
724 if (rtmsg->rtmsg_src_len)
727 if (rtmsg->rtmsg_ifindex) {
728 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
733 if (rtmsg->rtmsg_metric == 0)
734 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
736 rt = ip6_dst_alloc();
741 rt->u.dst.obsolete = -1;
742 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
743 if (nlh && (r = NLMSG_DATA(nlh))) {
744 rt->rt6i_protocol = r->rtm_protocol;
746 rt->rt6i_protocol = RTPROT_BOOT;
749 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
751 if (addr_type & IPV6_ADDR_MULTICAST)
752 rt->u.dst.input = ip6_mc_input;
754 rt->u.dst.input = ip6_forward;
756 rt->u.dst.output = ip6_output;
758 ipv6_addr_prefix(&rt->rt6i_dst.addr,
759 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
760 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
761 if (rt->rt6i_dst.plen == 128)
762 rt->u.dst.flags = DST_HOST;
764 #ifdef CONFIG_IPV6_SUBTREES
765 ipv6_addr_prefix(&rt->rt6i_src.addr,
766 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
767 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
770 rt->rt6i_metric = rtmsg->rtmsg_metric;
772 /* We cannot add true routes via loopback here,
773 they would result in kernel looping; promote them to reject routes
775 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
776 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
781 rt->u.dst.output = ip6_pkt_discard;
782 rt->u.dst.input = ip6_pkt_discard;
783 rt->u.dst.error = -ENETUNREACH;
784 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
788 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
789 struct in6_addr *gw_addr;
792 gw_addr = &rtmsg->rtmsg_gateway;
793 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
794 gwa_type = ipv6_addr_type(gw_addr);
796 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
797 struct rt6_info *grt;
799 /* IPv6 strictly inhibits using not link-local
800 addresses as nexthop address.
801 Otherwise, router will not able to send redirects.
802 It is very good, but in some (rare!) circumstances
803 (SIT, PtP, NBMA NOARP links) it is handy to allow
804 some exceptions. --ANK
807 if (!(gwa_type&IPV6_ADDR_UNICAST))
810 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
816 if (dev != grt->rt6i_dev) {
817 dst_release(&grt->u.dst);
824 if (!(grt->rt6i_flags&RTF_GATEWAY))
826 dst_release(&grt->u.dst);
832 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
840 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
841 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
842 if (IS_ERR(rt->rt6i_nexthop)) {
843 err = PTR_ERR(rt->rt6i_nexthop);
844 rt->rt6i_nexthop = NULL;
849 rt->rt6i_flags = rtmsg->rtmsg_flags;
852 if (rta && rta[RTA_METRICS-1]) {
853 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
854 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
856 while (RTA_OK(attr, attrlen)) {
857 unsigned flavor = attr->rta_type;
859 if (flavor > RTAX_MAX) {
863 rt->u.dst.metrics[flavor-1] =
864 *(u32 *)RTA_DATA(attr);
866 attr = RTA_NEXT(attr, attrlen);
870 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
871 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
872 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
873 IPV6_DEFAULT_MCASTHOPS;
875 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
876 ipv6_get_hoplimit(dev);
879 if (!rt->u.dst.metrics[RTAX_MTU-1])
880 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
881 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
882 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
884 rt->rt6i_idev = in6_dev_get(dev);
885 return rt6_ins(rt, nlh, _rtattr);
890 dst_free((struct dst_entry *) rt);
894 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
898 write_lock_bh(&rt6_lock);
900 spin_lock_bh(&rt6_dflt_lock);
901 rt6_dflt_pointer = NULL;
902 spin_unlock_bh(&rt6_dflt_lock);
904 dst_release(&rt->u.dst);
906 err = fib6_del(rt, nlh, _rtattr);
907 write_unlock_bh(&rt6_lock);
912 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
914 struct fib6_node *fn;
918 read_lock_bh(&rt6_lock);
920 fn = fib6_locate(&ip6_routing_table,
921 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
922 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
925 for (rt = fn->leaf; rt; rt = rt->u.next) {
926 if (rtmsg->rtmsg_ifindex &&
927 (rt->rt6i_dev == NULL ||
928 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
930 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
931 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
933 if (rtmsg->rtmsg_metric &&
934 rtmsg->rtmsg_metric != rt->rt6i_metric)
936 dst_hold(&rt->u.dst);
937 read_unlock_bh(&rt6_lock);
939 return ip6_del_rt(rt, nlh, _rtattr);
942 read_unlock_bh(&rt6_lock);
950 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
951 struct neighbour *neigh, int on_link)
953 struct rt6_info *rt, *nrt;
955 /* Locate old route to this destination. */
956 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
961 if (neigh->dev != rt->rt6i_dev)
964 /* Redirect received -> path was valid.
965 Look, redirects are sent only in response to data packets,
966 so that this nexthop apparently is reachable. --ANK
968 dst_confirm(&rt->u.dst);
970 /* Duplicate redirect: silently ignore. */
971 if (neigh == rt->u.dst.neighbour)
974 /* Current route is on-link; redirect is always invalid.
976 Seems, previous statement is not true. It could
977 be node, which looks for us as on-link (f.e. proxy ndisc)
978 But then router serving it might decide, that we should
979 know truth 8)8) --ANK (980726).
981 if (!(rt->rt6i_flags&RTF_GATEWAY))
985 * RFC 2461 specifies that redirects should only be
986 * accepted if they come from the nexthop to the target.
987 * Due to the way default routers are chosen, this notion
988 * is a bit fuzzy and one might need to check all default
992 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
993 if (rt->rt6i_flags & RTF_DEFAULT) {
994 struct rt6_info *rt1;
996 read_lock(&rt6_lock);
997 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
998 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
999 dst_hold(&rt1->u.dst);
1000 dst_release(&rt->u.dst);
1001 read_unlock(&rt6_lock);
1006 read_unlock(&rt6_lock);
1008 if (net_ratelimit())
1009 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1010 "for redirect target\n");
1017 * We have finally decided to accept it.
1020 nrt = ip6_rt_copy(rt);
1024 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1026 nrt->rt6i_flags &= ~RTF_GATEWAY;
1028 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1029 nrt->rt6i_dst.plen = 128;
1030 nrt->u.dst.flags |= DST_HOST;
1032 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1033 nrt->rt6i_nexthop = neigh_clone(neigh);
1034 /* Reset pmtu, it may be better */
1035 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1036 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1038 if (rt6_ins(nrt, NULL, NULL))
1041 if (rt->rt6i_flags&RTF_CACHE) {
1042 ip6_del_rt(rt, NULL, NULL);
1047 dst_release(&rt->u.dst);
1052 * Handle ICMP "packet too big" messages
1053 * i.e. Path MTU discovery
1056 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1057 struct net_device *dev, u32 pmtu)
1059 struct rt6_info *rt, *nrt;
1061 if (pmtu < IPV6_MIN_MTU) {
1062 if (net_ratelimit())
1063 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1065 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1066 link MTU if the node receives a Packet Too Big message
1067 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1069 pmtu = IPV6_MIN_MTU;
1072 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1077 if (pmtu >= dst_pmtu(&rt->u.dst))
1080 /* New mtu received -> path was valid.
1081 They are sent only in response to data packets,
1082 so that this nexthop apparently is reachable. --ANK
1084 dst_confirm(&rt->u.dst);
1086 /* Host route. If it is static, it would be better
1087 not to override it, but add new one, so that
1088 when cache entry will expire old pmtu
1089 would return automatically.
1091 if (rt->rt6i_flags & RTF_CACHE) {
1092 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1093 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1094 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1099 Two cases are possible:
1100 1. It is connected route. Action: COW
1101 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1103 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1104 nrt = rt6_cow(rt, daddr, saddr);
1105 if (!nrt->u.dst.error) {
1106 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1107 /* According to RFC 1981, detecting PMTU increase shouldn't be
1108 happened within 5 mins, the recommended timer is 10 mins.
1109 Here this route expiration time is set to ip6_rt_mtu_expires
1110 which is 10 mins. After 10 mins the decreased pmtu is expired
1111 and detecting PMTU increase will be automatically happened.
1113 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1114 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1116 dst_release(&nrt->u.dst);
1118 nrt = ip6_rt_copy(rt);
1121 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1122 nrt->rt6i_dst.plen = 128;
1123 nrt->u.dst.flags |= DST_HOST;
1124 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1125 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1126 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1127 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1128 rt6_ins(nrt, NULL, NULL);
1132 dst_release(&rt->u.dst);
1136 * Misc support functions
1139 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1141 struct rt6_info *rt = ip6_dst_alloc();
1144 rt->u.dst.input = ort->u.dst.input;
1145 rt->u.dst.output = ort->u.dst.output;
1147 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1148 rt->u.dst.dev = ort->u.dst.dev;
1150 dev_hold(rt->u.dst.dev);
1151 rt->rt6i_idev = ort->rt6i_idev;
1153 in6_dev_hold(rt->rt6i_idev);
1154 rt->u.dst.lastuse = jiffies;
1155 rt->rt6i_expires = 0;
1157 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1158 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1159 rt->rt6i_metric = 0;
1161 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1162 #ifdef CONFIG_IPV6_SUBTREES
1163 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1169 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1171 struct rt6_info *rt;
1172 struct fib6_node *fn;
1174 fn = &ip6_routing_table;
1176 write_lock_bh(&rt6_lock);
1177 for (rt = fn->leaf; rt; rt=rt->u.next) {
1178 if (dev == rt->rt6i_dev &&
1179 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1183 dst_hold(&rt->u.dst);
1184 write_unlock_bh(&rt6_lock);
1188 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1189 struct net_device *dev)
1191 struct in6_rtmsg rtmsg;
1193 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1194 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1195 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1196 rtmsg.rtmsg_metric = 1024;
1197 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1199 rtmsg.rtmsg_ifindex = dev->ifindex;
1201 ip6_route_add(&rtmsg, NULL, NULL);
1202 return rt6_get_dflt_router(gwaddr, dev);
1205 void rt6_purge_dflt_routers(int last_resort)
1207 struct rt6_info *rt;
1211 flags = RTF_ALLONLINK;
1213 flags = RTF_DEFAULT | RTF_ADDRCONF;
1216 read_lock_bh(&rt6_lock);
1217 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1218 if (rt->rt6i_flags & flags) {
1219 dst_hold(&rt->u.dst);
1221 spin_lock_bh(&rt6_dflt_lock);
1222 rt6_dflt_pointer = NULL;
1223 spin_unlock_bh(&rt6_dflt_lock);
1225 read_unlock_bh(&rt6_lock);
1227 ip6_del_rt(rt, NULL, NULL);
1232 read_unlock_bh(&rt6_lock);
1235 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1237 struct in6_rtmsg rtmsg;
1241 case SIOCADDRT: /* Add a route */
1242 case SIOCDELRT: /* Delete a route */
1243 if (!capable(CAP_NET_ADMIN))
1245 err = copy_from_user(&rtmsg, arg,
1246 sizeof(struct in6_rtmsg));
1253 err = ip6_route_add(&rtmsg, NULL, NULL);
1256 err = ip6_route_del(&rtmsg, NULL, NULL);
1270 * Drop the packet on the floor
1273 int ip6_pkt_discard(struct sk_buff *skb)
1275 IP6_INC_STATS(OutNoRoutes);
1276 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1285 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1287 struct rt6_info *rt = ip6_dst_alloc();
1292 dev_hold(&loopback_dev);
1294 rt->u.dst.flags = DST_HOST;
1295 rt->u.dst.input = ip6_input;
1296 rt->u.dst.output = ip6_output;
1297 rt->rt6i_dev = &loopback_dev;
1298 rt->rt6i_idev = in6_dev_get(&loopback_dev);
1299 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1300 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1301 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1302 rt->u.dst.obsolete = -1;
1304 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1306 rt->rt6i_flags |= RTF_LOCAL;
1307 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1308 if (rt->rt6i_nexthop == NULL) {
1309 dst_free((struct dst_entry *) rt);
1313 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1314 rt->rt6i_dst.plen = 128;
1315 rt6_ins(rt, NULL, NULL);
1320 /* Delete address. Warning: you should check that this address
1321 disappeared before calling this function.
1324 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1326 struct rt6_info *rt;
1329 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1331 if (rt->rt6i_dst.plen == 128)
1332 err = ip6_del_rt(rt, NULL, NULL);
1334 dst_release(&rt->u.dst);
1340 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1342 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1343 rt != &ip6_null_entry) {
1344 RT6_TRACE("deleted by ifdown %p\n", rt);
1350 void rt6_ifdown(struct net_device *dev)
1352 write_lock_bh(&rt6_lock);
1353 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1354 write_unlock_bh(&rt6_lock);
1357 struct rt6_mtu_change_arg
1359 struct net_device *dev;
1363 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1365 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1366 struct inet6_dev *idev;
1368 /* In IPv6 pmtu discovery is not optional,
1369 so that RTAX_MTU lock cannot disable it.
1370 We still use this lock to block changes
1371 caused by addrconf/ndisc.
1374 idev = __in6_dev_get(arg->dev);
1378 /* For administrative MTU increase, there is no way to discover
1379 IPv6 PMTU increase, so PMTU increase should be updated here.
1380 Since RFC 1981 doesn't include administrative MTU increase
1381 update PMTU increase is a MUST. (i.e. jumbo frame)
1384 If new MTU is less than route PMTU, this new MTU will be the
1385 lowest MTU in the path, update the route PMTU to reflect PMTU
1386 decreases; if new MTU is greater than route PMTU, and the
1387 old MTU is the lowest MTU in the path, update the route PMTU
1388 to reflect the increase. In this case if the other nodes' MTU
1389 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1392 if (rt->rt6i_dev == arg->dev &&
1393 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1394 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1395 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1396 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1397 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1398 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1402 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1404 struct rt6_mtu_change_arg arg;
1408 read_lock_bh(&rt6_lock);
1409 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1410 read_unlock_bh(&rt6_lock);
1413 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1414 struct in6_rtmsg *rtmsg)
1416 memset(rtmsg, 0, sizeof(*rtmsg));
1418 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1419 rtmsg->rtmsg_src_len = r->rtm_src_len;
1420 rtmsg->rtmsg_flags = RTF_UP;
1421 if (r->rtm_type == RTN_UNREACHABLE)
1422 rtmsg->rtmsg_flags |= RTF_REJECT;
1424 if (rta[RTA_GATEWAY-1]) {
1425 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1427 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1428 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1430 if (rta[RTA_DST-1]) {
1431 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1433 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1435 if (rta[RTA_SRC-1]) {
1436 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1438 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1440 if (rta[RTA_OIF-1]) {
1441 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1443 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1445 if (rta[RTA_PRIORITY-1]) {
1446 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1448 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1453 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1455 struct rtmsg *r = NLMSG_DATA(nlh);
1456 struct in6_rtmsg rtmsg;
1458 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1460 return ip6_route_del(&rtmsg, nlh, arg);
1463 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1465 struct rtmsg *r = NLMSG_DATA(nlh);
1466 struct in6_rtmsg rtmsg;
1468 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1470 return ip6_route_add(&rtmsg, nlh, arg);
1473 struct rt6_rtnl_dump_arg
1475 struct sk_buff *skb;
1476 struct netlink_callback *cb;
1479 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1480 struct in6_addr *dst,
1481 struct in6_addr *src,
1483 int type, u32 pid, u32 seq,
1484 struct nlmsghdr *in_nlh, int prefix)
1487 struct nlmsghdr *nlh;
1488 unsigned char *b = skb->tail;
1489 struct rta_cacheinfo ci;
1491 if (prefix) { /* user wants prefix routes only */
1492 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1493 /* success since this is not a prefix route */
1498 if (!pid && in_nlh) {
1499 pid = in_nlh->nlmsg_pid;
1502 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1503 rtm = NLMSG_DATA(nlh);
1504 rtm->rtm_family = AF_INET6;
1505 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1506 rtm->rtm_src_len = rt->rt6i_src.plen;
1508 rtm->rtm_table = RT_TABLE_MAIN;
1509 if (rt->rt6i_flags&RTF_REJECT)
1510 rtm->rtm_type = RTN_UNREACHABLE;
1511 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1512 rtm->rtm_type = RTN_LOCAL;
1514 rtm->rtm_type = RTN_UNICAST;
1516 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1517 rtm->rtm_protocol = rt->rt6i_protocol;
1518 if (rt->rt6i_flags&RTF_DYNAMIC)
1519 rtm->rtm_protocol = RTPROT_REDIRECT;
1520 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1521 rtm->rtm_protocol = RTPROT_KERNEL;
1522 else if (rt->rt6i_flags&RTF_DEFAULT)
1523 rtm->rtm_protocol = RTPROT_RA;
1525 if (rt->rt6i_flags&RTF_CACHE)
1526 rtm->rtm_flags |= RTM_F_CLONED;
1529 RTA_PUT(skb, RTA_DST, 16, dst);
1530 rtm->rtm_dst_len = 128;
1531 } else if (rtm->rtm_dst_len)
1532 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1533 #ifdef CONFIG_IPV6_SUBTREES
1535 RTA_PUT(skb, RTA_SRC, 16, src);
1536 rtm->rtm_src_len = 128;
1537 } else if (rtm->rtm_src_len)
1538 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1541 RTA_PUT(skb, RTA_IIF, 4, &iif);
1543 struct in6_addr saddr_buf;
1544 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1545 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1547 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1548 goto rtattr_failure;
1549 if (rt->u.dst.neighbour)
1550 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1552 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1553 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1554 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1555 if (rt->rt6i_expires)
1556 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1559 ci.rta_used = rt->u.dst.__use;
1560 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1561 ci.rta_error = rt->u.dst.error;
1565 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1566 nlh->nlmsg_len = skb->tail - b;
1571 skb_trim(skb, b - skb->data);
1575 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1577 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1580 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1581 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1582 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1586 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1587 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1591 static int fib6_dump_node(struct fib6_walker_t *w)
1594 struct rt6_info *rt;
1596 for (rt = w->leaf; rt; rt = rt->u.next) {
1597 res = rt6_dump_route(rt, w->args);
1599 /* Frame is full, suspend walking */
1609 static void fib6_dump_end(struct netlink_callback *cb)
1611 struct fib6_walker_t *w = (void*)cb->args[0];
1615 fib6_walker_unlink(w);
1619 cb->done = (void*)cb->args[1];
1624 static int fib6_dump_done(struct netlink_callback *cb)
1627 return cb->done(cb);
1630 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1632 struct rt6_rtnl_dump_arg arg;
1633 struct fib6_walker_t *w;
1639 w = (void*)cb->args[0];
1643 * 1. hook callback destructor.
1645 cb->args[1] = (long)cb->done;
1646 cb->done = fib6_dump_done;
1649 * 2. allocate and initialize walker.
1651 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1654 RT6_TRACE("dump<%p", w);
1655 memset(w, 0, sizeof(*w));
1656 w->root = &ip6_routing_table;
1657 w->func = fib6_dump_node;
1659 cb->args[0] = (long)w;
1660 read_lock_bh(&rt6_lock);
1662 read_unlock_bh(&rt6_lock);
1665 read_lock_bh(&rt6_lock);
1666 res = fib6_walk_continue(w);
1667 read_unlock_bh(&rt6_lock);
1670 if (res <= 0 && skb->len == 0)
1671 RT6_TRACE("%p>dump end\n", w);
1673 res = res < 0 ? res : skb->len;
1674 /* res < 0 is an error. (really, impossible)
1675 res == 0 means that dump is complete, but skb still can contain data.
1676 res > 0 dump is not complete, but frame is full.
1678 /* Destroy walker, if dump of this table is complete. */
1684 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1686 struct rtattr **rta = arg;
1689 struct sk_buff *skb;
1691 struct rt6_info *rt;
1693 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1697 /* Reserve room for dummy headers, this skb can pass
1698 through good chunk of routing engine.
1700 skb->mac.raw = skb->data;
1701 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1703 memset(&fl, 0, sizeof(fl));
1705 ipv6_addr_copy(&fl.fl6_src,
1706 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1708 ipv6_addr_copy(&fl.fl6_dst,
1709 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1712 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1715 struct net_device *dev;
1716 dev = __dev_get_by_index(iif);
1725 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1727 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1729 skb->dst = &rt->u.dst;
1731 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1732 err = rt6_fill_node(skb, rt,
1733 &fl.fl6_dst, &fl.fl6_src,
1735 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1736 nlh->nlmsg_seq, nlh, 0);
1742 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1752 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1754 struct sk_buff *skb;
1755 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1757 skb = alloc_skb(size, gfp_any());
1759 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1762 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1764 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1767 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1768 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1775 #ifdef CONFIG_PROC_FS
1777 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1788 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1790 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1793 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1798 if (arg->len >= arg->length)
1801 for (i=0; i<16; i++) {
1802 sprintf(arg->buffer + arg->len, "%02x",
1803 rt->rt6i_dst.addr.s6_addr[i]);
1806 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1809 #ifdef CONFIG_IPV6_SUBTREES
1810 for (i=0; i<16; i++) {
1811 sprintf(arg->buffer + arg->len, "%02x",
1812 rt->rt6i_src.addr.s6_addr[i]);
1815 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1818 sprintf(arg->buffer + arg->len,
1819 "00000000000000000000000000000000 00 ");
1823 if (rt->rt6i_nexthop) {
1824 for (i=0; i<16; i++) {
1825 sprintf(arg->buffer + arg->len, "%02x",
1826 rt->rt6i_nexthop->primary_key[i]);
1830 sprintf(arg->buffer + arg->len,
1831 "00000000000000000000000000000000");
1834 arg->len += sprintf(arg->buffer + arg->len,
1835 " %08x %08x %08x %08x %8s\n",
1836 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1837 rt->u.dst.__use, rt->rt6i_flags,
1838 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1842 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1844 struct rt6_proc_arg arg;
1845 arg.buffer = buffer;
1846 arg.offset = offset;
1847 arg.length = length;
1851 read_lock_bh(&rt6_lock);
1852 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1853 read_unlock_bh(&rt6_lock);
1857 *start += offset % RT6_INFO_LEN;
1859 arg.len -= offset % RT6_INFO_LEN;
1861 if (arg.len > length)
1869 extern struct rt6_statistics rt6_stats;
1871 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1873 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1874 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1875 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1876 rt6_stats.fib_rt_cache,
1877 atomic_read(&ip6_dst_ops.entries),
1878 rt6_stats.fib_discarded_routes);
1883 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1885 return single_open(file, rt6_stats_seq_show, NULL);
1888 static struct file_operations rt6_stats_seq_fops = {
1889 .owner = THIS_MODULE,
1890 .open = rt6_stats_seq_open,
1892 .llseek = seq_lseek,
1893 .release = single_release,
1895 #endif /* CONFIG_PROC_FS */
1897 #ifdef CONFIG_SYSCTL
1899 static int flush_delay;
1902 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1903 void __user *buffer, size_t *lenp)
1906 proc_dointvec(ctl, write, filp, buffer, lenp);
1907 if (flush_delay < 0)
1909 fib6_run_gc((unsigned long)flush_delay);
1915 ctl_table ipv6_route_table[] = {
1917 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1918 .procname = "flush",
1919 .data = &flush_delay,
1920 .maxlen = sizeof(int),
1922 .proc_handler = &ipv6_sysctl_rtcache_flush
1925 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1926 .procname = "gc_thresh",
1927 .data = &ip6_dst_ops.gc_thresh,
1928 .maxlen = sizeof(int),
1930 .proc_handler = &proc_dointvec,
1933 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1934 .procname = "max_size",
1935 .data = &ip6_rt_max_size,
1936 .maxlen = sizeof(int),
1938 .proc_handler = &proc_dointvec,
1941 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1942 .procname = "gc_min_interval",
1943 .data = &ip6_rt_gc_min_interval,
1944 .maxlen = sizeof(int),
1946 .proc_handler = &proc_dointvec_jiffies,
1947 .strategy = &sysctl_jiffies,
1950 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1951 .procname = "gc_timeout",
1952 .data = &ip6_rt_gc_timeout,
1953 .maxlen = sizeof(int),
1955 .proc_handler = &proc_dointvec_jiffies,
1956 .strategy = &sysctl_jiffies,
1959 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
1960 .procname = "gc_interval",
1961 .data = &ip6_rt_gc_interval,
1962 .maxlen = sizeof(int),
1964 .proc_handler = &proc_dointvec_jiffies,
1965 .strategy = &sysctl_jiffies,
1968 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
1969 .procname = "gc_elasticity",
1970 .data = &ip6_rt_gc_elasticity,
1971 .maxlen = sizeof(int),
1973 .proc_handler = &proc_dointvec_jiffies,
1974 .strategy = &sysctl_jiffies,
1977 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
1978 .procname = "mtu_expires",
1979 .data = &ip6_rt_mtu_expires,
1980 .maxlen = sizeof(int),
1982 .proc_handler = &proc_dointvec_jiffies,
1983 .strategy = &sysctl_jiffies,
1986 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
1987 .procname = "min_adv_mss",
1988 .data = &ip6_rt_min_advmss,
1989 .maxlen = sizeof(int),
1991 .proc_handler = &proc_dointvec_jiffies,
1992 .strategy = &sysctl_jiffies,
1999 void __init ip6_route_init(void)
2001 struct proc_dir_entry *p;
2003 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2004 sizeof(struct rt6_info),
2005 0, SLAB_HWCACHE_ALIGN,
2007 if (!ip6_dst_ops.kmem_cachep)
2008 panic("cannot create ip6_dst_cache");
2011 #ifdef CONFIG_PROC_FS
2012 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2014 p->owner = THIS_MODULE;
2016 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2023 void __exit ip6_route_cleanup(void)
2025 #ifdef CONFIG_PROC_FS
2026 proc_net_remove("ipv6_route");
2027 proc_net_remove("rt6_stats");
2034 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);