2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static void ip6_dst_ifdown(struct dst_entry *, int how);
88 static int ip6_dst_gc(void);
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct sk_buff *skb);
92 static void ip6_link_failure(struct sk_buff *skb);
93 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 static struct dst_ops ip6_dst_ops = {
97 .protocol = __constant_htons(ETH_P_IPV6),
100 .check = ip6_dst_check,
101 .destroy = ip6_dst_destroy,
102 .ifdown = ip6_dst_ifdown,
103 .negative_advice = ip6_negative_advice,
104 .link_failure = ip6_link_failure,
105 .update_pmtu = ip6_rt_update_pmtu,
106 .entry_size = sizeof(struct rt6_info),
109 struct rt6_info ip6_null_entry = {
112 .__refcnt = ATOMIC_INIT(1),
114 .dev = &loopback_dev,
116 .error = -ENETUNREACH,
117 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
118 .input = ip6_pkt_discard,
119 .output = ip6_pkt_discard_out,
121 .path = (struct dst_entry*)&ip6_null_entry,
124 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
125 .rt6i_metric = ~(u32) 0,
126 .rt6i_ref = ATOMIC_INIT(1),
129 struct fib6_node ip6_routing_table = {
130 .leaf = &ip6_null_entry,
131 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 /* Protects all the ip6 fib */
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 static void ip6_dst_destroy(struct dst_entry *dst)
147 struct rt6_info *rt = (struct rt6_info *)dst;
148 struct inet6_dev *idev = rt->rt6i_idev;
151 rt->rt6i_idev = NULL;
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
158 struct rt6_info *rt = (struct rt6_info *)dst;
159 struct inet6_dev *idev = rt->rt6i_idev;
161 if (idev != NULL && idev->dev != &loopback_dev) {
162 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
163 if (loopback_idev != NULL) {
164 rt->rt6i_idev = loopback_idev;
171 * Route lookup. Any rt6_lock is implied.
174 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
178 struct rt6_info *local = NULL;
179 struct rt6_info *sprt;
182 for (sprt = rt; sprt; sprt = sprt->u.next) {
183 struct net_device *dev = sprt->rt6i_dev;
184 if (dev->ifindex == oif)
186 if (dev->flags & IFF_LOOPBACK) {
187 if (sprt->rt6i_idev == NULL ||
188 sprt->rt6i_idev->dev->ifindex != oif) {
191 if (local && (!oif ||
192 local->rt6i_idev->dev->ifindex == oif))
203 return &ip6_null_entry;
209 * pointer to the last default router chosen. BH is disabled locally.
211 struct rt6_info *rt6_dflt_pointer;
212 spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
214 void rt6_reset_dflt_pointer(struct rt6_info *rt)
216 spin_lock_bh(&rt6_dflt_lock);
217 if (rt == NULL || rt == rt6_dflt_pointer) {
218 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
219 rt6_dflt_pointer = NULL;
221 spin_unlock_bh(&rt6_dflt_lock);
224 /* Default Router Selection (RFC 2461 6.3.6) */
225 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
227 struct rt6_info *match = NULL;
228 struct rt6_info *sprt;
231 for (sprt = rt; sprt; sprt = sprt->u.next) {
232 struct neighbour *neigh;
237 sprt->rt6i_dev->ifindex == oif))
240 if ((sprt->rt6i_flags & RTF_EXPIRES) &&
241 time_after(jiffies, sprt->rt6i_expires))
244 if (sprt == rt6_dflt_pointer)
247 if ((neigh = sprt->rt6i_nexthop) != NULL) {
248 read_lock_bh(&neigh->lock);
249 switch (neigh->nud_state) {
267 read_unlock_bh(&neigh->lock);
270 read_unlock_bh(&neigh->lock);
275 if (m > mpri || m >= 12) {
279 /* we choose the last default router if it
280 * is in (probably) reachable state.
281 * If route changed, we should do pmtu
282 * discovery. --yoshfuji
289 spin_lock(&rt6_dflt_lock);
292 * No default routers are known to be reachable.
295 if (rt6_dflt_pointer) {
296 for (sprt = rt6_dflt_pointer->u.next;
297 sprt; sprt = sprt->u.next) {
298 if (sprt->u.dst.obsolete <= 0 &&
299 sprt->u.dst.error == 0) {
306 sprt = sprt->u.next) {
307 if (sprt->u.dst.obsolete <= 0 &&
308 sprt->u.dst.error == 0) {
312 if (sprt == rt6_dflt_pointer)
319 if (rt6_dflt_pointer != match)
320 RT6_TRACE("changed default router: %p->%p\n",
321 rt6_dflt_pointer, match);
322 rt6_dflt_pointer = match;
324 spin_unlock(&rt6_dflt_lock);
328 * Last Resort: if no default routers found,
329 * use addrconf default route.
330 * We don't record this route.
332 for (sprt = ip6_routing_table.leaf;
333 sprt; sprt = sprt->u.next) {
334 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
337 sprt->rt6i_dev->ifindex == oif))) {
343 /* no default route. give up. */
344 match = &ip6_null_entry;
351 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
354 struct fib6_node *fn;
357 read_lock_bh(&rt6_lock);
358 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
359 rt = rt6_device_match(fn->leaf, oif, strict);
360 dst_hold(&rt->u.dst);
362 read_unlock_bh(&rt6_lock);
364 rt->u.dst.lastuse = jiffies;
365 if (rt->u.dst.error == 0)
367 dst_release(&rt->u.dst);
371 /* ip6_ins_rt is called with FREE rt6_lock.
372 It takes new route entry, the addition fails by any reason the
373 route is freed. In any case, if caller does not hold it, it may
377 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
381 write_lock_bh(&rt6_lock);
382 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
383 write_unlock_bh(&rt6_lock);
388 /* No rt6_lock! If COW failed, the function returns dead route entry
389 with dst->error set to errno value.
392 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
393 struct in6_addr *saddr)
402 rt = ip6_rt_copy(ort);
405 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
407 if (!(rt->rt6i_flags&RTF_GATEWAY))
408 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
410 rt->rt6i_dst.plen = 128;
411 rt->rt6i_flags |= RTF_CACHE;
412 rt->u.dst.flags |= DST_HOST;
414 #ifdef CONFIG_IPV6_SUBTREES
415 if (rt->rt6i_src.plen && saddr) {
416 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
417 rt->rt6i_src.plen = 128;
421 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
423 dst_hold(&rt->u.dst);
425 err = ip6_ins_rt(rt, NULL, NULL);
429 rt->u.dst.error = err;
433 dst_hold(&ip6_null_entry.u.dst);
434 return &ip6_null_entry;
437 #define BACKTRACK() \
438 if (rt == &ip6_null_entry && strict) { \
439 while ((fn = fn->parent) != NULL) { \
440 if (fn->fn_flags & RTN_ROOT) { \
441 dst_hold(&rt->u.dst); \
444 if (fn->fn_flags & RTN_RTINFO) \
450 void ip6_route_input(struct sk_buff *skb)
452 struct fib6_node *fn;
457 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
460 read_lock_bh(&rt6_lock);
462 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
463 &skb->nh.ipv6h->saddr);
468 if ((rt->rt6i_flags & RTF_CACHE)) {
469 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
471 dst_hold(&rt->u.dst);
475 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
478 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479 struct rt6_info *nrt;
480 dst_hold(&rt->u.dst);
481 read_unlock_bh(&rt6_lock);
483 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
484 &skb->nh.ipv6h->saddr);
486 dst_release(&rt->u.dst);
489 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
492 /* Race condition! In the gap, when rt6_lock was
493 released someone could insert this route. Relookup.
495 dst_release(&rt->u.dst);
498 dst_hold(&rt->u.dst);
501 read_unlock_bh(&rt6_lock);
503 rt->u.dst.lastuse = jiffies;
505 skb->dst = (struct dst_entry *) rt;
508 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
510 struct fib6_node *fn;
515 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
518 read_lock_bh(&rt6_lock);
520 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
525 if ((rt->rt6i_flags & RTF_CACHE)) {
526 rt = rt6_device_match(rt, fl->oif, strict);
528 dst_hold(&rt->u.dst);
531 if (rt->rt6i_flags & RTF_DEFAULT) {
532 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
533 rt = rt6_best_dflt(rt, fl->oif);
535 rt = rt6_device_match(rt, fl->oif, strict);
539 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
540 struct rt6_info *nrt;
541 dst_hold(&rt->u.dst);
542 read_unlock_bh(&rt6_lock);
544 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
546 dst_release(&rt->u.dst);
549 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
552 /* Race condition! In the gap, when rt6_lock was
553 released someone could insert this route. Relookup.
555 dst_release(&rt->u.dst);
558 dst_hold(&rt->u.dst);
561 read_unlock_bh(&rt6_lock);
563 rt->u.dst.lastuse = jiffies;
570 * Destination cache support functions
573 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
577 rt = (struct rt6_info *) dst;
579 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
586 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
588 struct rt6_info *rt = (struct rt6_info *) dst;
591 if (rt->rt6i_flags & RTF_CACHE)
592 ip6_del_rt(rt, NULL, NULL);
599 static void ip6_link_failure(struct sk_buff *skb)
603 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
605 rt = (struct rt6_info *) skb->dst;
607 if (rt->rt6i_flags&RTF_CACHE) {
608 dst_set_expires(&rt->u.dst, 0);
609 rt->rt6i_flags |= RTF_EXPIRES;
610 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
611 rt->rt6i_node->fn_sernum = -1;
615 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
617 struct rt6_info *rt6 = (struct rt6_info*)dst;
619 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
620 rt6->rt6i_flags |= RTF_MODIFIED;
621 if (mtu < IPV6_MIN_MTU)
623 dst->metrics[RTAX_MTU-1] = mtu;
627 /* Protected by rt6_lock. */
628 static struct dst_entry *ndisc_dst_gc_list;
629 static int ipv6_get_mtu(struct net_device *dev);
631 static inline unsigned int ipv6_advmss(unsigned int mtu)
633 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
635 if (mtu < ip6_rt_min_advmss)
636 mtu = ip6_rt_min_advmss;
639 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
640 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
641 * IPV6_MAXPLEN is also valid and means: "any MSS,
642 * rely only on pmtu discovery"
644 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
649 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
650 struct neighbour *neigh,
651 struct in6_addr *addr,
652 int (*output)(struct sk_buff *))
655 struct inet6_dev *idev = in6_dev_get(dev);
657 if (unlikely(idev == NULL))
660 rt = ip6_dst_alloc();
661 if (unlikely(rt == NULL)) {
670 neigh = ndisc_get_neigh(dev, addr);
673 rt->rt6i_idev = idev;
674 rt->rt6i_nexthop = neigh;
675 atomic_set(&rt->u.dst.__refcnt, 1);
676 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
677 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
678 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
679 rt->u.dst.output = output;
681 #if 0 /* there's no chance to use these for ndisc */
682 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
685 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
686 rt->rt6i_dst.plen = 128;
689 write_lock_bh(&rt6_lock);
690 rt->u.dst.next = ndisc_dst_gc_list;
691 ndisc_dst_gc_list = &rt->u.dst;
692 write_unlock_bh(&rt6_lock);
694 fib6_force_start_gc();
697 return (struct dst_entry *)rt;
700 int ndisc_dst_gc(int *more)
702 struct dst_entry *dst, *next, **pprev;
706 pprev = &ndisc_dst_gc_list;
708 while ((dst = *pprev) != NULL) {
709 if (!atomic_read(&dst->__refcnt)) {
722 static int ip6_dst_gc(void)
724 static unsigned expire = 30*HZ;
725 static unsigned long last_gc;
726 unsigned long now = jiffies;
728 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
729 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
735 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
736 expire = ip6_rt_gc_timeout>>1;
739 expire -= expire>>ip6_rt_gc_elasticity;
740 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
743 /* Clean host part of a prefix. Not necessary in radix tree,
744 but results in cleaner routing tables.
746 Remove it only when all the things will work!
749 static int ipv6_get_mtu(struct net_device *dev)
751 int mtu = IPV6_MIN_MTU;
752 struct inet6_dev *idev;
754 idev = in6_dev_get(dev);
756 mtu = idev->cnf.mtu6;
762 static int ipv6_get_hoplimit(struct net_device *dev)
764 int hoplimit = ipv6_devconf.hop_limit;
765 struct inet6_dev *idev;
767 idev = in6_dev_get(dev);
769 hoplimit = idev->cnf.hop_limit;
779 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
784 struct rt6_info *rt = NULL;
785 struct net_device *dev = NULL;
786 struct inet6_dev *idev = NULL;
789 rta = (struct rtattr **) _rtattr;
791 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
793 #ifndef CONFIG_IPV6_SUBTREES
794 if (rtmsg->rtmsg_src_len)
797 if (rtmsg->rtmsg_ifindex) {
799 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
802 idev = in6_dev_get(dev);
807 if (rtmsg->rtmsg_metric == 0)
808 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
810 rt = ip6_dst_alloc();
817 rt->u.dst.obsolete = -1;
818 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
819 if (nlh && (r = NLMSG_DATA(nlh))) {
820 rt->rt6i_protocol = r->rtm_protocol;
822 rt->rt6i_protocol = RTPROT_BOOT;
825 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
827 if (addr_type & IPV6_ADDR_MULTICAST)
828 rt->u.dst.input = ip6_mc_input;
830 rt->u.dst.input = ip6_forward;
832 rt->u.dst.output = ip6_output;
834 ipv6_addr_prefix(&rt->rt6i_dst.addr,
835 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
836 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
837 if (rt->rt6i_dst.plen == 128)
838 rt->u.dst.flags = DST_HOST;
840 #ifdef CONFIG_IPV6_SUBTREES
841 ipv6_addr_prefix(&rt->rt6i_src.addr,
842 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
843 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
846 rt->rt6i_metric = rtmsg->rtmsg_metric;
848 /* We cannot add true routes via loopback here,
849 they would result in kernel looping; promote them to reject routes
851 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
852 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
853 /* hold loopback dev/idev if we haven't done so. */
854 if (dev != &loopback_dev) {
861 idev = in6_dev_get(dev);
867 rt->u.dst.output = ip6_pkt_discard_out;
868 rt->u.dst.input = ip6_pkt_discard;
869 rt->u.dst.error = -ENETUNREACH;
870 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
874 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
875 struct in6_addr *gw_addr;
878 gw_addr = &rtmsg->rtmsg_gateway;
879 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
880 gwa_type = ipv6_addr_type(gw_addr);
882 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
883 struct rt6_info *grt;
885 /* IPv6 strictly inhibits using not link-local
886 addresses as nexthop address.
887 Otherwise, router will not able to send redirects.
888 It is very good, but in some (rare!) circumstances
889 (SIT, PtP, NBMA NOARP links) it is handy to allow
890 some exceptions. --ANK
893 if (!(gwa_type&IPV6_ADDR_UNICAST))
896 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
902 if (dev != grt->rt6i_dev) {
903 dst_release(&grt->u.dst);
908 idev = grt->rt6i_idev;
910 in6_dev_hold(grt->rt6i_idev);
912 if (!(grt->rt6i_flags&RTF_GATEWAY))
914 dst_release(&grt->u.dst);
920 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
928 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
929 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
930 if (IS_ERR(rt->rt6i_nexthop)) {
931 err = PTR_ERR(rt->rt6i_nexthop);
932 rt->rt6i_nexthop = NULL;
937 rt->rt6i_flags = rtmsg->rtmsg_flags;
940 if (rta && rta[RTA_METRICS-1]) {
941 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
942 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
944 while (RTA_OK(attr, attrlen)) {
945 unsigned flavor = attr->rta_type;
947 if (flavor > RTAX_MAX) {
951 rt->u.dst.metrics[flavor-1] =
952 *(u32 *)RTA_DATA(attr);
954 attr = RTA_NEXT(attr, attrlen);
958 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
959 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
960 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
961 IPV6_DEFAULT_MCASTHOPS;
963 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
964 ipv6_get_hoplimit(dev);
967 if (!rt->u.dst.metrics[RTAX_MTU-1])
968 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
969 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
970 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
972 rt->rt6i_idev = idev;
973 return ip6_ins_rt(rt, nlh, _rtattr);
981 dst_free((struct dst_entry *) rt);
985 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
989 write_lock_bh(&rt6_lock);
991 rt6_reset_dflt_pointer(NULL);
993 err = fib6_del(rt, nlh, _rtattr);
994 dst_release(&rt->u.dst);
996 write_unlock_bh(&rt6_lock);
1001 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1003 struct fib6_node *fn;
1004 struct rt6_info *rt;
1007 read_lock_bh(&rt6_lock);
1009 fn = fib6_locate(&ip6_routing_table,
1010 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1011 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1014 for (rt = fn->leaf; rt; rt = rt->u.next) {
1015 if (rtmsg->rtmsg_ifindex &&
1016 (rt->rt6i_dev == NULL ||
1017 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1019 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1020 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1022 if (rtmsg->rtmsg_metric &&
1023 rtmsg->rtmsg_metric != rt->rt6i_metric)
1025 dst_hold(&rt->u.dst);
1026 read_unlock_bh(&rt6_lock);
1028 return ip6_del_rt(rt, nlh, _rtattr);
1031 read_unlock_bh(&rt6_lock);
1039 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1040 struct neighbour *neigh, u8 *lladdr, int on_link)
1042 struct rt6_info *rt, *nrt;
1044 /* Locate old route to this destination. */
1045 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1050 if (neigh->dev != rt->rt6i_dev)
1054 * Current route is on-link; redirect is always invalid.
1056 * Seems, previous statement is not true. It could
1057 * be node, which looks for us as on-link (f.e. proxy ndisc)
1058 * But then router serving it might decide, that we should
1059 * know truth 8)8) --ANK (980726).
1061 if (!(rt->rt6i_flags&RTF_GATEWAY))
1065 * RFC 2461 specifies that redirects should only be
1066 * accepted if they come from the nexthop to the target.
1067 * Due to the way default routers are chosen, this notion
1068 * is a bit fuzzy and one might need to check all default
1071 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1072 if (rt->rt6i_flags & RTF_DEFAULT) {
1073 struct rt6_info *rt1;
1075 read_lock(&rt6_lock);
1076 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1077 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1078 dst_hold(&rt1->u.dst);
1079 dst_release(&rt->u.dst);
1080 read_unlock(&rt6_lock);
1085 read_unlock(&rt6_lock);
1087 if (net_ratelimit())
1088 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1089 "for redirect target\n");
1096 * We have finally decided to accept it.
1099 neigh_update(neigh, lladdr, NUD_STALE,
1100 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1101 NEIGH_UPDATE_F_OVERRIDE|
1102 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1103 NEIGH_UPDATE_F_ISROUTER))
1107 * Redirect received -> path was valid.
1108 * Look, redirects are sent only in response to data packets,
1109 * so that this nexthop apparently is reachable. --ANK
1111 dst_confirm(&rt->u.dst);
1113 /* Duplicate redirect: silently ignore. */
1114 if (neigh == rt->u.dst.neighbour)
1117 nrt = ip6_rt_copy(rt);
1121 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1123 nrt->rt6i_flags &= ~RTF_GATEWAY;
1125 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1126 nrt->rt6i_dst.plen = 128;
1127 nrt->u.dst.flags |= DST_HOST;
1129 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1130 nrt->rt6i_nexthop = neigh_clone(neigh);
1131 /* Reset pmtu, it may be better */
1132 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1133 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1135 if (ip6_ins_rt(nrt, NULL, NULL))
1138 if (rt->rt6i_flags&RTF_CACHE) {
1139 ip6_del_rt(rt, NULL, NULL);
1144 dst_release(&rt->u.dst);
1149 * Handle ICMP "packet too big" messages
1150 * i.e. Path MTU discovery
1153 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1154 struct net_device *dev, u32 pmtu)
1156 struct rt6_info *rt, *nrt;
1158 if (pmtu < IPV6_MIN_MTU) {
1159 if (net_ratelimit())
1160 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1162 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1163 link MTU if the node receives a Packet Too Big message
1164 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1166 pmtu = IPV6_MIN_MTU;
1169 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1174 if (pmtu >= dst_pmtu(&rt->u.dst))
1177 /* New mtu received -> path was valid.
1178 They are sent only in response to data packets,
1179 so that this nexthop apparently is reachable. --ANK
1181 dst_confirm(&rt->u.dst);
1183 /* Host route. If it is static, it would be better
1184 not to override it, but add new one, so that
1185 when cache entry will expire old pmtu
1186 would return automatically.
1188 if (rt->rt6i_flags & RTF_CACHE) {
1189 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1190 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1191 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1196 Two cases are possible:
1197 1. It is connected route. Action: COW
1198 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1200 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1201 nrt = rt6_cow(rt, daddr, saddr);
1202 if (!nrt->u.dst.error) {
1203 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1204 /* According to RFC 1981, detecting PMTU increase shouldn't be
1205 happened within 5 mins, the recommended timer is 10 mins.
1206 Here this route expiration time is set to ip6_rt_mtu_expires
1207 which is 10 mins. After 10 mins the decreased pmtu is expired
1208 and detecting PMTU increase will be automatically happened.
1210 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1211 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1213 dst_release(&nrt->u.dst);
1215 nrt = ip6_rt_copy(rt);
1218 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1219 nrt->rt6i_dst.plen = 128;
1220 nrt->u.dst.flags |= DST_HOST;
1221 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1222 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1223 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1224 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1225 ip6_ins_rt(nrt, NULL, NULL);
1229 dst_release(&rt->u.dst);
1233 * Misc support functions
1236 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1238 struct rt6_info *rt = ip6_dst_alloc();
1241 rt->u.dst.input = ort->u.dst.input;
1242 rt->u.dst.output = ort->u.dst.output;
1244 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1245 rt->u.dst.dev = ort->u.dst.dev;
1247 dev_hold(rt->u.dst.dev);
1248 rt->rt6i_idev = ort->rt6i_idev;
1250 in6_dev_hold(rt->rt6i_idev);
1251 rt->u.dst.lastuse = jiffies;
1252 rt->rt6i_expires = 0;
1254 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1255 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1256 rt->rt6i_metric = 0;
1258 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1259 #ifdef CONFIG_IPV6_SUBTREES
1260 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1266 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1268 struct rt6_info *rt;
1269 struct fib6_node *fn;
1271 fn = &ip6_routing_table;
1273 write_lock_bh(&rt6_lock);
1274 for (rt = fn->leaf; rt; rt=rt->u.next) {
1275 if (dev == rt->rt6i_dev &&
1276 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1280 dst_hold(&rt->u.dst);
1281 write_unlock_bh(&rt6_lock);
1285 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1286 struct net_device *dev)
1288 struct in6_rtmsg rtmsg;
1290 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1291 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1292 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1293 rtmsg.rtmsg_metric = 1024;
1294 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1296 rtmsg.rtmsg_ifindex = dev->ifindex;
1298 ip6_route_add(&rtmsg, NULL, NULL);
1299 return rt6_get_dflt_router(gwaddr, dev);
1302 void rt6_purge_dflt_routers(void)
1304 struct rt6_info *rt;
1307 read_lock_bh(&rt6_lock);
1308 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1309 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1310 dst_hold(&rt->u.dst);
1312 rt6_reset_dflt_pointer(NULL);
1314 read_unlock_bh(&rt6_lock);
1316 ip6_del_rt(rt, NULL, NULL);
1321 read_unlock_bh(&rt6_lock);
1324 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1326 struct in6_rtmsg rtmsg;
1330 case SIOCADDRT: /* Add a route */
1331 case SIOCDELRT: /* Delete a route */
1332 if (!capable(CAP_NET_ADMIN))
1334 err = copy_from_user(&rtmsg, arg,
1335 sizeof(struct in6_rtmsg));
1342 err = ip6_route_add(&rtmsg, NULL, NULL);
1345 err = ip6_route_del(&rtmsg, NULL, NULL);
1359 * Drop the packet on the floor
1362 int ip6_pkt_discard(struct sk_buff *skb)
1364 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1365 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1370 int ip6_pkt_discard_out(struct sk_buff *skb)
1372 skb->dev = skb->dst->dev;
1373 return ip6_pkt_discard(skb);
1377 * Allocate a dst for local (unicast / anycast) address.
1380 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1381 const struct in6_addr *addr,
1384 struct rt6_info *rt = ip6_dst_alloc();
1387 return ERR_PTR(-ENOMEM);
1389 dev_hold(&loopback_dev);
1392 rt->u.dst.flags = DST_HOST;
1393 rt->u.dst.input = ip6_input;
1394 rt->u.dst.output = ip6_output;
1395 rt->rt6i_dev = &loopback_dev;
1396 rt->rt6i_idev = idev;
1397 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1398 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1399 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1400 rt->u.dst.obsolete = -1;
1402 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1404 rt->rt6i_flags |= RTF_LOCAL;
1405 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1406 if (rt->rt6i_nexthop == NULL) {
1407 dst_free((struct dst_entry *) rt);
1408 return ERR_PTR(-ENOMEM);
1411 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1412 rt->rt6i_dst.plen = 128;
1414 atomic_set(&rt->u.dst.__refcnt, 1);
1419 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1421 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1422 rt != &ip6_null_entry) {
1423 RT6_TRACE("deleted by ifdown %p\n", rt);
1429 void rt6_ifdown(struct net_device *dev)
1431 write_lock_bh(&rt6_lock);
1432 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1433 write_unlock_bh(&rt6_lock);
1436 struct rt6_mtu_change_arg
1438 struct net_device *dev;
1442 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1444 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1445 struct inet6_dev *idev;
1447 /* In IPv6 pmtu discovery is not optional,
1448 so that RTAX_MTU lock cannot disable it.
1449 We still use this lock to block changes
1450 caused by addrconf/ndisc.
1453 idev = __in6_dev_get(arg->dev);
1457 /* For administrative MTU increase, there is no way to discover
1458 IPv6 PMTU increase, so PMTU increase should be updated here.
1459 Since RFC 1981 doesn't include administrative MTU increase
1460 update PMTU increase is a MUST. (i.e. jumbo frame)
1463 If new MTU is less than route PMTU, this new MTU will be the
1464 lowest MTU in the path, update the route PMTU to reflect PMTU
1465 decreases; if new MTU is greater than route PMTU, and the
1466 old MTU is the lowest MTU in the path, update the route PMTU
1467 to reflect the increase. In this case if the other nodes' MTU
1468 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1471 if (rt->rt6i_dev == arg->dev &&
1472 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1473 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1474 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1475 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1476 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1477 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1481 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1483 struct rt6_mtu_change_arg arg;
1487 read_lock_bh(&rt6_lock);
1488 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1489 read_unlock_bh(&rt6_lock);
1492 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1493 struct in6_rtmsg *rtmsg)
1495 memset(rtmsg, 0, sizeof(*rtmsg));
1497 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1498 rtmsg->rtmsg_src_len = r->rtm_src_len;
1499 rtmsg->rtmsg_flags = RTF_UP;
1500 if (r->rtm_type == RTN_UNREACHABLE)
1501 rtmsg->rtmsg_flags |= RTF_REJECT;
1503 if (rta[RTA_GATEWAY-1]) {
1504 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1506 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1507 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1509 if (rta[RTA_DST-1]) {
1510 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1512 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1514 if (rta[RTA_SRC-1]) {
1515 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1517 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1519 if (rta[RTA_OIF-1]) {
1520 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1522 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1524 if (rta[RTA_PRIORITY-1]) {
1525 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1527 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1532 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1534 struct rtmsg *r = NLMSG_DATA(nlh);
1535 struct in6_rtmsg rtmsg;
1537 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1539 return ip6_route_del(&rtmsg, nlh, arg);
1542 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1544 struct rtmsg *r = NLMSG_DATA(nlh);
1545 struct in6_rtmsg rtmsg;
1547 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1549 return ip6_route_add(&rtmsg, nlh, arg);
1552 struct rt6_rtnl_dump_arg
1554 struct sk_buff *skb;
1555 struct netlink_callback *cb;
1558 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1559 struct in6_addr *dst,
1560 struct in6_addr *src,
1562 int type, u32 pid, u32 seq,
1563 struct nlmsghdr *in_nlh, int prefix)
1566 struct nlmsghdr *nlh;
1567 unsigned char *b = skb->tail;
1568 struct rta_cacheinfo ci;
1570 if (prefix) { /* user wants prefix routes only */
1571 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1572 /* success since this is not a prefix route */
1577 if (!pid && in_nlh) {
1578 pid = in_nlh->nlmsg_pid;
1581 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1582 rtm = NLMSG_DATA(nlh);
1583 rtm->rtm_family = AF_INET6;
1584 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1585 rtm->rtm_src_len = rt->rt6i_src.plen;
1587 rtm->rtm_table = RT_TABLE_MAIN;
1588 if (rt->rt6i_flags&RTF_REJECT)
1589 rtm->rtm_type = RTN_UNREACHABLE;
1590 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1591 rtm->rtm_type = RTN_LOCAL;
1593 rtm->rtm_type = RTN_UNICAST;
1595 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1596 rtm->rtm_protocol = rt->rt6i_protocol;
1597 if (rt->rt6i_flags&RTF_DYNAMIC)
1598 rtm->rtm_protocol = RTPROT_REDIRECT;
1599 else if (rt->rt6i_flags & RTF_ADDRCONF)
1600 rtm->rtm_protocol = RTPROT_KERNEL;
1601 else if (rt->rt6i_flags&RTF_DEFAULT)
1602 rtm->rtm_protocol = RTPROT_RA;
1604 if (rt->rt6i_flags&RTF_CACHE)
1605 rtm->rtm_flags |= RTM_F_CLONED;
1608 RTA_PUT(skb, RTA_DST, 16, dst);
1609 rtm->rtm_dst_len = 128;
1610 } else if (rtm->rtm_dst_len)
1611 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1612 #ifdef CONFIG_IPV6_SUBTREES
1614 RTA_PUT(skb, RTA_SRC, 16, src);
1615 rtm->rtm_src_len = 128;
1616 } else if (rtm->rtm_src_len)
1617 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1620 RTA_PUT(skb, RTA_IIF, 4, &iif);
1622 struct in6_addr saddr_buf;
1623 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1624 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1626 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1627 goto rtattr_failure;
1628 if (rt->u.dst.neighbour)
1629 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1631 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1632 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1633 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1634 if (rt->rt6i_expires)
1635 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1638 ci.rta_used = rt->u.dst.__use;
1639 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1640 ci.rta_error = rt->u.dst.error;
1644 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1645 nlh->nlmsg_len = skb->tail - b;
1650 skb_trim(skb, b - skb->data);
1654 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1656 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1659 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1660 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1661 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1665 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1666 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1670 static int fib6_dump_node(struct fib6_walker_t *w)
1673 struct rt6_info *rt;
1675 for (rt = w->leaf; rt; rt = rt->u.next) {
1676 res = rt6_dump_route(rt, w->args);
1678 /* Frame is full, suspend walking */
1688 static void fib6_dump_end(struct netlink_callback *cb)
1690 struct fib6_walker_t *w = (void*)cb->args[0];
1694 fib6_walker_unlink(w);
1698 cb->done = (void*)cb->args[1];
1703 static int fib6_dump_done(struct netlink_callback *cb)
1706 return cb->done(cb);
1709 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1711 struct rt6_rtnl_dump_arg arg;
1712 struct fib6_walker_t *w;
1718 w = (void*)cb->args[0];
1722 * 1. hook callback destructor.
1724 cb->args[1] = (long)cb->done;
1725 cb->done = fib6_dump_done;
1728 * 2. allocate and initialize walker.
1730 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1733 RT6_TRACE("dump<%p", w);
1734 memset(w, 0, sizeof(*w));
1735 w->root = &ip6_routing_table;
1736 w->func = fib6_dump_node;
1738 cb->args[0] = (long)w;
1739 read_lock_bh(&rt6_lock);
1741 read_unlock_bh(&rt6_lock);
1744 read_lock_bh(&rt6_lock);
1745 res = fib6_walk_continue(w);
1746 read_unlock_bh(&rt6_lock);
1749 if (res <= 0 && skb->len == 0)
1750 RT6_TRACE("%p>dump end\n", w);
1752 res = res < 0 ? res : skb->len;
1753 /* res < 0 is an error. (really, impossible)
1754 res == 0 means that dump is complete, but skb still can contain data.
1755 res > 0 dump is not complete, but frame is full.
1757 /* Destroy walker, if dump of this table is complete. */
1763 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1765 struct rtattr **rta = arg;
1768 struct sk_buff *skb;
1770 struct rt6_info *rt;
1772 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1776 /* Reserve room for dummy headers, this skb can pass
1777 through good chunk of routing engine.
1779 skb->mac.raw = skb->data;
1780 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1782 memset(&fl, 0, sizeof(fl));
1784 ipv6_addr_copy(&fl.fl6_src,
1785 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1787 ipv6_addr_copy(&fl.fl6_dst,
1788 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1791 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1794 struct net_device *dev;
1795 dev = __dev_get_by_index(iif);
1804 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1806 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1808 skb->dst = &rt->u.dst;
1810 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1811 err = rt6_fill_node(skb, rt,
1812 &fl.fl6_dst, &fl.fl6_src,
1814 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1815 nlh->nlmsg_seq, nlh, 0);
1821 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1831 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1833 struct sk_buff *skb;
1834 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1836 skb = alloc_skb(size, gfp_any());
1838 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1841 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1843 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1846 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1847 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1854 #ifdef CONFIG_PROC_FS
1856 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1867 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1869 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1872 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1877 if (arg->len >= arg->length)
1880 for (i=0; i<16; i++) {
1881 sprintf(arg->buffer + arg->len, "%02x",
1882 rt->rt6i_dst.addr.s6_addr[i]);
1885 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1888 #ifdef CONFIG_IPV6_SUBTREES
1889 for (i=0; i<16; i++) {
1890 sprintf(arg->buffer + arg->len, "%02x",
1891 rt->rt6i_src.addr.s6_addr[i]);
1894 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1897 sprintf(arg->buffer + arg->len,
1898 "00000000000000000000000000000000 00 ");
1902 if (rt->rt6i_nexthop) {
1903 for (i=0; i<16; i++) {
1904 sprintf(arg->buffer + arg->len, "%02x",
1905 rt->rt6i_nexthop->primary_key[i]);
1909 sprintf(arg->buffer + arg->len,
1910 "00000000000000000000000000000000");
1913 arg->len += sprintf(arg->buffer + arg->len,
1914 " %08x %08x %08x %08x %8s\n",
1915 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1916 rt->u.dst.__use, rt->rt6i_flags,
1917 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1921 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1923 struct rt6_proc_arg arg;
1924 arg.buffer = buffer;
1925 arg.offset = offset;
1926 arg.length = length;
1930 read_lock_bh(&rt6_lock);
1931 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1932 read_unlock_bh(&rt6_lock);
1936 *start += offset % RT6_INFO_LEN;
1938 arg.len -= offset % RT6_INFO_LEN;
1940 if (arg.len > length)
1948 extern struct rt6_statistics rt6_stats;
1950 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1952 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1953 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1954 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1955 rt6_stats.fib_rt_cache,
1956 atomic_read(&ip6_dst_ops.entries),
1957 rt6_stats.fib_discarded_routes);
1962 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1964 return single_open(file, rt6_stats_seq_show, NULL);
1967 static struct file_operations rt6_stats_seq_fops = {
1968 .owner = THIS_MODULE,
1969 .open = rt6_stats_seq_open,
1971 .llseek = seq_lseek,
1972 .release = single_release,
1974 #endif /* CONFIG_PROC_FS */
1976 #ifdef CONFIG_SYSCTL
1978 static int flush_delay;
1981 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1982 void __user *buffer, size_t *lenp, loff_t *ppos)
1985 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1986 if (flush_delay < 0)
1988 fib6_run_gc((unsigned long)flush_delay);
1994 ctl_table ipv6_route_table[] = {
1996 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1997 .procname = "flush",
1998 .data = &flush_delay,
1999 .maxlen = sizeof(int),
2001 .proc_handler = &ipv6_sysctl_rtcache_flush
2004 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2005 .procname = "gc_thresh",
2006 .data = &ip6_dst_ops.gc_thresh,
2007 .maxlen = sizeof(int),
2009 .proc_handler = &proc_dointvec,
2012 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2013 .procname = "max_size",
2014 .data = &ip6_rt_max_size,
2015 .maxlen = sizeof(int),
2017 .proc_handler = &proc_dointvec,
2020 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2021 .procname = "gc_min_interval",
2022 .data = &ip6_rt_gc_min_interval,
2023 .maxlen = sizeof(int),
2025 .proc_handler = &proc_dointvec_jiffies,
2026 .strategy = &sysctl_jiffies,
2029 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2030 .procname = "gc_timeout",
2031 .data = &ip6_rt_gc_timeout,
2032 .maxlen = sizeof(int),
2034 .proc_handler = &proc_dointvec_jiffies,
2035 .strategy = &sysctl_jiffies,
2038 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2039 .procname = "gc_interval",
2040 .data = &ip6_rt_gc_interval,
2041 .maxlen = sizeof(int),
2043 .proc_handler = &proc_dointvec_jiffies,
2044 .strategy = &sysctl_jiffies,
2047 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2048 .procname = "gc_elasticity",
2049 .data = &ip6_rt_gc_elasticity,
2050 .maxlen = sizeof(int),
2052 .proc_handler = &proc_dointvec_jiffies,
2053 .strategy = &sysctl_jiffies,
2056 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2057 .procname = "mtu_expires",
2058 .data = &ip6_rt_mtu_expires,
2059 .maxlen = sizeof(int),
2061 .proc_handler = &proc_dointvec_jiffies,
2062 .strategy = &sysctl_jiffies,
2065 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2066 .procname = "min_adv_mss",
2067 .data = &ip6_rt_min_advmss,
2068 .maxlen = sizeof(int),
2070 .proc_handler = &proc_dointvec_jiffies,
2071 .strategy = &sysctl_jiffies,
2078 void __init ip6_route_init(void)
2080 struct proc_dir_entry *p;
2082 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2083 sizeof(struct rt6_info),
2084 0, SLAB_HWCACHE_ALIGN,
2086 if (!ip6_dst_ops.kmem_cachep)
2087 panic("cannot create ip6_dst_cache");
2090 #ifdef CONFIG_PROC_FS
2091 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2093 p->owner = THIS_MODULE;
2095 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2102 void __exit ip6_route_cleanup(void)
2104 #ifdef CONFIG_PROC_FS
2105 proc_net_remove("ipv6_route");
2106 proc_net_remove("rt6_stats");
2113 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);