2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static void ip6_dst_ifdown(struct dst_entry *, int how);
88 static int ip6_dst_gc(void);
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void ip6_link_failure(struct sk_buff *skb);
93 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 static struct dst_ops ip6_dst_ops = {
97 .protocol = __constant_htons(ETH_P_IPV6),
100 .check = ip6_dst_check,
101 .destroy = ip6_dst_destroy,
102 .ifdown = ip6_dst_ifdown,
103 .negative_advice = ip6_negative_advice,
104 .link_failure = ip6_link_failure,
105 .update_pmtu = ip6_rt_update_pmtu,
106 .entry_size = sizeof(struct rt6_info),
109 struct rt6_info ip6_null_entry = {
112 .__refcnt = ATOMIC_INIT(1),
114 .dev = &loopback_dev,
116 .error = -ENETUNREACH,
117 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
118 .input = ip6_pkt_discard,
119 .output = ip6_pkt_discard_out,
121 .path = (struct dst_entry*)&ip6_null_entry,
124 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
125 .rt6i_metric = ~(u32) 0,
126 .rt6i_ref = ATOMIC_INIT(1),
129 struct fib6_node ip6_routing_table = {
130 .leaf = &ip6_null_entry,
131 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 /* Protects all the ip6 fib */
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 static void ip6_dst_destroy(struct dst_entry *dst)
147 struct rt6_info *rt = (struct rt6_info *)dst;
148 struct inet6_dev *idev = rt->rt6i_idev;
151 rt->rt6i_idev = NULL;
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
158 struct rt6_info *rt = (struct rt6_info *)dst;
159 struct inet6_dev *idev = rt->rt6i_idev;
161 if (idev != NULL && idev->dev != &loopback_dev) {
162 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
163 if (loopback_idev != NULL) {
164 rt->rt6i_idev = loopback_idev;
171 * Route lookup. Any rt6_lock is implied.
174 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
178 struct rt6_info *local = NULL;
179 struct rt6_info *sprt;
182 for (sprt = rt; sprt; sprt = sprt->u.next) {
183 struct net_device *dev = sprt->rt6i_dev;
184 if (dev->ifindex == oif)
186 if (dev->flags & IFF_LOOPBACK) {
187 if (sprt->rt6i_idev == NULL ||
188 sprt->rt6i_idev->dev->ifindex != oif) {
191 if (local && (!oif ||
192 local->rt6i_idev->dev->ifindex == oif))
203 return &ip6_null_entry;
209 * pointer to the last default router chosen. BH is disabled locally.
211 struct rt6_info *rt6_dflt_pointer;
212 spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
214 void rt6_reset_dflt_pointer(struct rt6_info *rt)
216 spin_lock_bh(&rt6_dflt_lock);
217 if (rt == NULL || rt == rt6_dflt_pointer) {
218 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
219 rt6_dflt_pointer = NULL;
221 spin_unlock_bh(&rt6_dflt_lock);
224 /* Default Router Selection (RFC 2461 6.3.6) */
225 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
227 struct rt6_info *match = NULL;
228 struct rt6_info *sprt;
231 for (sprt = rt; sprt; sprt = sprt->u.next) {
232 struct neighbour *neigh;
237 sprt->rt6i_dev->ifindex == oif))
240 if ((sprt->rt6i_flags & RTF_EXPIRES) &&
241 time_after(jiffies, sprt->rt6i_expires))
244 if (sprt == rt6_dflt_pointer)
247 if ((neigh = sprt->rt6i_nexthop) != NULL) {
248 read_lock_bh(&neigh->lock);
249 switch (neigh->nud_state) {
267 read_unlock_bh(&neigh->lock);
270 read_unlock_bh(&neigh->lock);
275 if (m > mpri || m >= 12) {
279 /* we choose the last default router if it
280 * is in (probably) reachable state.
281 * If route changed, we should do pmtu
282 * discovery. --yoshfuji
289 spin_lock(&rt6_dflt_lock);
292 * No default routers are known to be reachable.
295 if (rt6_dflt_pointer) {
296 for (sprt = rt6_dflt_pointer->u.next;
297 sprt; sprt = sprt->u.next) {
298 if (sprt->u.dst.obsolete <= 0 &&
299 sprt->u.dst.error == 0) {
306 sprt = sprt->u.next) {
307 if (sprt->u.dst.obsolete <= 0 &&
308 sprt->u.dst.error == 0) {
312 if (sprt == rt6_dflt_pointer)
319 if (rt6_dflt_pointer != match)
320 RT6_TRACE("changed default router: %p->%p\n",
321 rt6_dflt_pointer, match);
322 rt6_dflt_pointer = match;
324 spin_unlock(&rt6_dflt_lock);
328 * Last Resort: if no default routers found,
329 * use addrconf default route.
330 * We don't record this route.
332 for (sprt = ip6_routing_table.leaf;
333 sprt; sprt = sprt->u.next) {
334 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
337 sprt->rt6i_dev->ifindex == oif))) {
343 /* no default route. give up. */
344 match = &ip6_null_entry;
351 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
354 struct fib6_node *fn;
357 read_lock_bh(&rt6_lock);
358 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
359 rt = rt6_device_match(fn->leaf, oif, strict);
360 dst_hold(&rt->u.dst);
362 read_unlock_bh(&rt6_lock);
364 rt->u.dst.lastuse = jiffies;
365 if (rt->u.dst.error == 0)
367 dst_release(&rt->u.dst);
371 /* ip6_ins_rt is called with FREE rt6_lock.
372 It takes new route entry, the addition fails by any reason the
373 route is freed. In any case, if caller does not hold it, it may
377 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
381 write_lock_bh(&rt6_lock);
382 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
383 write_unlock_bh(&rt6_lock);
388 /* No rt6_lock! If COW failed, the function returns dead route entry
389 with dst->error set to errno value.
392 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
393 struct in6_addr *saddr)
402 rt = ip6_rt_copy(ort);
405 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
407 if (!(rt->rt6i_flags&RTF_GATEWAY))
408 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
410 rt->rt6i_dst.plen = 128;
411 rt->rt6i_flags |= RTF_CACHE;
412 rt->u.dst.flags |= DST_HOST;
414 #ifdef CONFIG_IPV6_SUBTREES
415 if (rt->rt6i_src.plen && saddr) {
416 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
417 rt->rt6i_src.plen = 128;
421 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
423 dst_hold(&rt->u.dst);
425 err = ip6_ins_rt(rt, NULL, NULL);
429 rt->u.dst.error = err;
433 dst_hold(&ip6_null_entry.u.dst);
434 return &ip6_null_entry;
437 #define BACKTRACK() \
438 if (rt == &ip6_null_entry && strict) { \
439 while ((fn = fn->parent) != NULL) { \
440 if (fn->fn_flags & RTN_ROOT) { \
441 dst_hold(&rt->u.dst); \
444 if (fn->fn_flags & RTN_RTINFO) \
450 void ip6_route_input(struct sk_buff *skb)
452 struct fib6_node *fn;
457 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
460 read_lock_bh(&rt6_lock);
462 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
463 &skb->nh.ipv6h->saddr);
468 if ((rt->rt6i_flags & RTF_CACHE)) {
469 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
471 dst_hold(&rt->u.dst);
475 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
478 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479 read_unlock_bh(&rt6_lock);
481 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
482 &skb->nh.ipv6h->saddr);
484 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
486 /* Race condition! In the gap, when rt6_lock was
487 released someone could insert this route. Relookup.
489 dst_release(&rt->u.dst);
492 dst_hold(&rt->u.dst);
495 read_unlock_bh(&rt6_lock);
497 rt->u.dst.lastuse = jiffies;
499 skb->dst = (struct dst_entry *) rt;
502 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
504 struct fib6_node *fn;
509 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
512 read_lock_bh(&rt6_lock);
514 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
519 if ((rt->rt6i_flags & RTF_CACHE)) {
520 rt = rt6_device_match(rt, fl->oif, strict);
522 dst_hold(&rt->u.dst);
525 if (rt->rt6i_flags & RTF_DEFAULT) {
526 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
527 rt = rt6_best_dflt(rt, fl->oif);
529 rt = rt6_device_match(rt, fl->oif, strict);
533 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
534 read_unlock_bh(&rt6_lock);
536 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
538 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
541 /* Race condition! In the gap, when rt6_lock was
542 released someone could insert this route. Relookup.
544 dst_release(&rt->u.dst);
547 dst_hold(&rt->u.dst);
550 read_unlock_bh(&rt6_lock);
552 rt->u.dst.lastuse = jiffies;
559 * Destination cache support functions
562 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
566 rt = (struct rt6_info *) dst;
568 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
575 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
577 struct rt6_info *rt = (struct rt6_info *) dst;
580 if (rt->rt6i_flags & RTF_CACHE)
581 ip6_del_rt(rt, NULL, NULL);
588 static void ip6_link_failure(struct sk_buff *skb)
592 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
594 rt = (struct rt6_info *) skb->dst;
596 if (rt->rt6i_flags&RTF_CACHE) {
597 dst_set_expires(&rt->u.dst, 0);
598 rt->rt6i_flags |= RTF_EXPIRES;
599 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
600 rt->rt6i_node->fn_sernum = -1;
604 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
606 struct rt6_info *rt6 = (struct rt6_info*)dst;
608 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
609 rt6->rt6i_flags |= RTF_MODIFIED;
610 if (mtu < IPV6_MIN_MTU)
612 dst->metrics[RTAX_MTU-1] = mtu;
616 /* Protected by rt6_lock. */
617 static struct dst_entry *ndisc_dst_gc_list;
618 static int ipv6_get_mtu(struct net_device *dev);
620 static inline unsigned int ipv6_advmss(unsigned int mtu)
622 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
624 if (mtu < ip6_rt_min_advmss)
625 mtu = ip6_rt_min_advmss;
628 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
629 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
630 * IPV6_MAXPLEN is also valid and means: "any MSS,
631 * rely only on pmtu discovery"
633 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
638 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
639 struct neighbour *neigh,
640 struct in6_addr *addr,
641 int (*output)(struct sk_buff **))
644 struct inet6_dev *idev = in6_dev_get(dev);
646 if (unlikely(idev == NULL))
649 rt = ip6_dst_alloc();
650 if (unlikely(rt == NULL))
657 neigh = ndisc_get_neigh(dev, addr);
660 rt->rt6i_idev = idev;
661 rt->rt6i_nexthop = neigh;
662 atomic_set(&rt->u.dst.__refcnt, 1);
663 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
664 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
665 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
666 rt->u.dst.output = output;
668 #if 0 /* there's no chance to use these for ndisc */
669 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
672 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
673 rt->rt6i_dst.plen = 128;
676 write_lock_bh(&rt6_lock);
677 rt->u.dst.next = ndisc_dst_gc_list;
678 ndisc_dst_gc_list = &rt->u.dst;
679 write_unlock_bh(&rt6_lock);
681 fib6_force_start_gc();
684 return (struct dst_entry *)rt;
687 int ndisc_dst_gc(int *more)
689 struct dst_entry *dst, *next, **pprev;
693 pprev = &ndisc_dst_gc_list;
695 while ((dst = *pprev) != NULL) {
696 if (!atomic_read(&dst->__refcnt)) {
709 static int ip6_dst_gc(void)
711 static unsigned expire = 30*HZ;
712 static unsigned long last_gc;
713 unsigned long now = jiffies;
715 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
716 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
722 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
723 expire = ip6_rt_gc_timeout>>1;
726 expire -= expire>>ip6_rt_gc_elasticity;
727 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
730 /* Clean host part of a prefix. Not necessary in radix tree,
731 but results in cleaner routing tables.
733 Remove it only when all the things will work!
736 static int ipv6_get_mtu(struct net_device *dev)
738 int mtu = IPV6_MIN_MTU;
739 struct inet6_dev *idev;
741 idev = in6_dev_get(dev);
743 mtu = idev->cnf.mtu6;
749 static int ipv6_get_hoplimit(struct net_device *dev)
751 int hoplimit = ipv6_devconf.hop_limit;
752 struct inet6_dev *idev;
754 idev = in6_dev_get(dev);
756 hoplimit = idev->cnf.hop_limit;
766 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
771 struct rt6_info *rt = NULL;
772 struct net_device *dev = NULL;
773 struct inet6_dev *idev = NULL;
776 rta = (struct rtattr **) _rtattr;
778 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
780 #ifndef CONFIG_IPV6_SUBTREES
781 if (rtmsg->rtmsg_src_len)
784 if (rtmsg->rtmsg_ifindex) {
786 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
789 idev = in6_dev_get(dev);
794 if (rtmsg->rtmsg_metric == 0)
795 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
797 rt = ip6_dst_alloc();
802 rt->u.dst.obsolete = -1;
803 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
804 if (nlh && (r = NLMSG_DATA(nlh))) {
805 rt->rt6i_protocol = r->rtm_protocol;
807 rt->rt6i_protocol = RTPROT_BOOT;
810 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
812 if (addr_type & IPV6_ADDR_MULTICAST)
813 rt->u.dst.input = ip6_mc_input;
815 rt->u.dst.input = ip6_forward;
817 rt->u.dst.output = ip6_output;
819 ipv6_addr_prefix(&rt->rt6i_dst.addr,
820 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
821 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
822 if (rt->rt6i_dst.plen == 128)
823 rt->u.dst.flags = DST_HOST;
825 #ifdef CONFIG_IPV6_SUBTREES
826 ipv6_addr_prefix(&rt->rt6i_src.addr,
827 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
828 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
831 rt->rt6i_metric = rtmsg->rtmsg_metric;
833 /* We cannot add true routes via loopback here,
834 they would result in kernel looping; promote them to reject routes
836 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
837 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
838 /* hold loopback dev/idev if we haven't done so. */
839 if (dev != &loopback_dev) {
846 idev = in6_dev_get(dev);
852 rt->u.dst.output = ip6_pkt_discard_out;
853 rt->u.dst.input = ip6_pkt_discard;
854 rt->u.dst.error = -ENETUNREACH;
855 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
859 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
860 struct in6_addr *gw_addr;
863 gw_addr = &rtmsg->rtmsg_gateway;
864 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
865 gwa_type = ipv6_addr_type(gw_addr);
867 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
868 struct rt6_info *grt;
870 /* IPv6 strictly inhibits using not link-local
871 addresses as nexthop address.
872 Otherwise, router will not able to send redirects.
873 It is very good, but in some (rare!) circumstances
874 (SIT, PtP, NBMA NOARP links) it is handy to allow
875 some exceptions. --ANK
878 if (!(gwa_type&IPV6_ADDR_UNICAST))
881 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
887 if (dev != grt->rt6i_dev) {
888 dst_release(&grt->u.dst);
893 idev = grt->rt6i_idev;
895 in6_dev_hold(grt->rt6i_idev);
897 if (!(grt->rt6i_flags&RTF_GATEWAY))
899 dst_release(&grt->u.dst);
905 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
913 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
914 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
915 if (IS_ERR(rt->rt6i_nexthop)) {
916 err = PTR_ERR(rt->rt6i_nexthop);
917 rt->rt6i_nexthop = NULL;
922 rt->rt6i_flags = rtmsg->rtmsg_flags;
925 if (rta && rta[RTA_METRICS-1]) {
926 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
927 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
929 while (RTA_OK(attr, attrlen)) {
930 unsigned flavor = attr->rta_type;
932 if (flavor > RTAX_MAX) {
936 rt->u.dst.metrics[flavor-1] =
937 *(u32 *)RTA_DATA(attr);
939 attr = RTA_NEXT(attr, attrlen);
943 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
944 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
945 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
946 IPV6_DEFAULT_MCASTHOPS;
948 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
949 ipv6_get_hoplimit(dev);
952 if (!rt->u.dst.metrics[RTAX_MTU-1])
953 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
954 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
955 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
957 rt->rt6i_idev = idev;
958 return ip6_ins_rt(rt, nlh, _rtattr);
963 dst_free((struct dst_entry *) rt);
967 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
971 write_lock_bh(&rt6_lock);
973 rt6_reset_dflt_pointer(NULL);
975 dst_release(&rt->u.dst);
977 err = fib6_del(rt, nlh, _rtattr);
978 write_unlock_bh(&rt6_lock);
983 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
985 struct fib6_node *fn;
989 read_lock_bh(&rt6_lock);
991 fn = fib6_locate(&ip6_routing_table,
992 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
993 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
996 for (rt = fn->leaf; rt; rt = rt->u.next) {
997 if (rtmsg->rtmsg_ifindex &&
998 (rt->rt6i_dev == NULL ||
999 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1001 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1002 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1004 if (rtmsg->rtmsg_metric &&
1005 rtmsg->rtmsg_metric != rt->rt6i_metric)
1007 dst_hold(&rt->u.dst);
1008 read_unlock_bh(&rt6_lock);
1010 return ip6_del_rt(rt, nlh, _rtattr);
1013 read_unlock_bh(&rt6_lock);
1021 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1022 struct neighbour *neigh, u8 *lladdr, int on_link)
1024 struct rt6_info *rt, *nrt;
1026 /* Locate old route to this destination. */
1027 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1032 if (neigh->dev != rt->rt6i_dev)
1036 * Current route is on-link; redirect is always invalid.
1038 * Seems, previous statement is not true. It could
1039 * be node, which looks for us as on-link (f.e. proxy ndisc)
1040 * But then router serving it might decide, that we should
1041 * know truth 8)8) --ANK (980726).
1043 if (!(rt->rt6i_flags&RTF_GATEWAY))
1047 * RFC 2461 specifies that redirects should only be
1048 * accepted if they come from the nexthop to the target.
1049 * Due to the way default routers are chosen, this notion
1050 * is a bit fuzzy and one might need to check all default
1053 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1054 if (rt->rt6i_flags & RTF_DEFAULT) {
1055 struct rt6_info *rt1;
1057 read_lock(&rt6_lock);
1058 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1059 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1060 dst_hold(&rt1->u.dst);
1061 dst_release(&rt->u.dst);
1062 read_unlock(&rt6_lock);
1067 read_unlock(&rt6_lock);
1069 if (net_ratelimit())
1070 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1071 "for redirect target\n");
1078 * We have finally decided to accept it.
1081 neigh_update(neigh, lladdr, NUD_STALE,
1082 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1083 NEIGH_UPDATE_F_OVERRIDE|
1084 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1085 NEIGH_UPDATE_F_ISROUTER))
1089 * Redirect received -> path was valid.
1090 * Look, redirects are sent only in response to data packets,
1091 * so that this nexthop apparently is reachable. --ANK
1093 dst_confirm(&rt->u.dst);
1095 /* Duplicate redirect: silently ignore. */
1096 if (neigh == rt->u.dst.neighbour)
1099 nrt = ip6_rt_copy(rt);
1103 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1105 nrt->rt6i_flags &= ~RTF_GATEWAY;
1107 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1108 nrt->rt6i_dst.plen = 128;
1109 nrt->u.dst.flags |= DST_HOST;
1111 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1112 nrt->rt6i_nexthop = neigh_clone(neigh);
1113 /* Reset pmtu, it may be better */
1114 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1115 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1117 if (ip6_ins_rt(nrt, NULL, NULL))
1120 if (rt->rt6i_flags&RTF_CACHE) {
1121 ip6_del_rt(rt, NULL, NULL);
1126 dst_release(&rt->u.dst);
1131 * Handle ICMP "packet too big" messages
1132 * i.e. Path MTU discovery
1135 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1136 struct net_device *dev, u32 pmtu)
1138 struct rt6_info *rt, *nrt;
1140 if (pmtu < IPV6_MIN_MTU) {
1141 if (net_ratelimit())
1142 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1144 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1145 link MTU if the node receives a Packet Too Big message
1146 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1148 pmtu = IPV6_MIN_MTU;
1151 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1156 if (pmtu >= dst_pmtu(&rt->u.dst))
1159 /* New mtu received -> path was valid.
1160 They are sent only in response to data packets,
1161 so that this nexthop apparently is reachable. --ANK
1163 dst_confirm(&rt->u.dst);
1165 /* Host route. If it is static, it would be better
1166 not to override it, but add new one, so that
1167 when cache entry will expire old pmtu
1168 would return automatically.
1170 if (rt->rt6i_flags & RTF_CACHE) {
1171 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1172 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1173 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1178 Two cases are possible:
1179 1. It is connected route. Action: COW
1180 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1182 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1183 nrt = rt6_cow(rt, daddr, saddr);
1184 if (!nrt->u.dst.error) {
1185 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1186 /* According to RFC 1981, detecting PMTU increase shouldn't be
1187 happened within 5 mins, the recommended timer is 10 mins.
1188 Here this route expiration time is set to ip6_rt_mtu_expires
1189 which is 10 mins. After 10 mins the decreased pmtu is expired
1190 and detecting PMTU increase will be automatically happened.
1192 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1193 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1195 dst_release(&nrt->u.dst);
1197 nrt = ip6_rt_copy(rt);
1200 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1201 nrt->rt6i_dst.plen = 128;
1202 nrt->u.dst.flags |= DST_HOST;
1203 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1204 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1205 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1206 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1207 ip6_ins_rt(nrt, NULL, NULL);
1211 dst_release(&rt->u.dst);
1215 * Misc support functions
1218 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1220 struct rt6_info *rt = ip6_dst_alloc();
1223 rt->u.dst.input = ort->u.dst.input;
1224 rt->u.dst.output = ort->u.dst.output;
1226 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1227 rt->u.dst.dev = ort->u.dst.dev;
1229 dev_hold(rt->u.dst.dev);
1230 rt->rt6i_idev = ort->rt6i_idev;
1232 in6_dev_hold(rt->rt6i_idev);
1233 rt->u.dst.lastuse = jiffies;
1234 rt->rt6i_expires = 0;
1236 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1237 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1238 rt->rt6i_metric = 0;
1240 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1241 #ifdef CONFIG_IPV6_SUBTREES
1242 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1248 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1250 struct rt6_info *rt;
1251 struct fib6_node *fn;
1253 fn = &ip6_routing_table;
1255 write_lock_bh(&rt6_lock);
1256 for (rt = fn->leaf; rt; rt=rt->u.next) {
1257 if (dev == rt->rt6i_dev &&
1258 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1262 dst_hold(&rt->u.dst);
1263 write_unlock_bh(&rt6_lock);
1267 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1268 struct net_device *dev)
1270 struct in6_rtmsg rtmsg;
1272 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1273 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1274 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1275 rtmsg.rtmsg_metric = 1024;
1276 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1278 rtmsg.rtmsg_ifindex = dev->ifindex;
1280 ip6_route_add(&rtmsg, NULL, NULL);
1281 return rt6_get_dflt_router(gwaddr, dev);
1284 void rt6_purge_dflt_routers(int last_resort)
1286 struct rt6_info *rt;
1290 flags = RTF_ALLONLINK;
1292 flags = RTF_DEFAULT | RTF_ADDRCONF;
1295 read_lock_bh(&rt6_lock);
1296 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1297 if (rt->rt6i_flags & flags) {
1298 dst_hold(&rt->u.dst);
1300 rt6_reset_dflt_pointer(NULL);
1302 read_unlock_bh(&rt6_lock);
1304 ip6_del_rt(rt, NULL, NULL);
1309 read_unlock_bh(&rt6_lock);
1312 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1314 struct in6_rtmsg rtmsg;
1318 case SIOCADDRT: /* Add a route */
1319 case SIOCDELRT: /* Delete a route */
1320 if (!capable(CAP_NET_ADMIN))
1322 err = copy_from_user(&rtmsg, arg,
1323 sizeof(struct in6_rtmsg));
1330 err = ip6_route_add(&rtmsg, NULL, NULL);
1333 err = ip6_route_del(&rtmsg, NULL, NULL);
1347 * Drop the packet on the floor
1350 int ip6_pkt_discard(struct sk_buff *skb)
1352 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1353 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1358 int ip6_pkt_discard_out(struct sk_buff **pskb)
1360 (*pskb)->dev = (*pskb)->dst->dev;
1361 BUG_ON(!(*pskb)->dev);
1362 return ip6_pkt_discard(*pskb);
1366 * Allocate a dst for local (unicast / anycast) address.
1369 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1370 const struct in6_addr *addr,
1373 struct rt6_info *rt = ip6_dst_alloc();
1376 return ERR_PTR(-ENOMEM);
1378 dev_hold(&loopback_dev);
1381 rt->u.dst.flags = DST_HOST;
1382 rt->u.dst.input = ip6_input;
1383 rt->u.dst.output = ip6_output;
1384 rt->rt6i_dev = &loopback_dev;
1385 rt->rt6i_idev = idev;
1386 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1387 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1388 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1389 rt->u.dst.obsolete = -1;
1391 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1393 rt->rt6i_flags |= RTF_LOCAL;
1394 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1395 if (rt->rt6i_nexthop == NULL) {
1396 dst_free((struct dst_entry *) rt);
1397 return ERR_PTR(-ENOMEM);
1400 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1401 rt->rt6i_dst.plen = 128;
1403 atomic_set(&rt->u.dst.__refcnt, 1);
1408 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1410 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1411 rt != &ip6_null_entry) {
1412 RT6_TRACE("deleted by ifdown %p\n", rt);
1418 void rt6_ifdown(struct net_device *dev)
1420 write_lock_bh(&rt6_lock);
1421 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1422 write_unlock_bh(&rt6_lock);
1425 struct rt6_mtu_change_arg
1427 struct net_device *dev;
1431 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1433 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1434 struct inet6_dev *idev;
1436 /* In IPv6 pmtu discovery is not optional,
1437 so that RTAX_MTU lock cannot disable it.
1438 We still use this lock to block changes
1439 caused by addrconf/ndisc.
1442 idev = __in6_dev_get(arg->dev);
1446 /* For administrative MTU increase, there is no way to discover
1447 IPv6 PMTU increase, so PMTU increase should be updated here.
1448 Since RFC 1981 doesn't include administrative MTU increase
1449 update PMTU increase is a MUST. (i.e. jumbo frame)
1452 If new MTU is less than route PMTU, this new MTU will be the
1453 lowest MTU in the path, update the route PMTU to reflect PMTU
1454 decreases; if new MTU is greater than route PMTU, and the
1455 old MTU is the lowest MTU in the path, update the route PMTU
1456 to reflect the increase. In this case if the other nodes' MTU
1457 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1460 if (rt->rt6i_dev == arg->dev &&
1461 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1462 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1463 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1464 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1465 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1466 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1470 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1472 struct rt6_mtu_change_arg arg;
1476 read_lock_bh(&rt6_lock);
1477 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1478 read_unlock_bh(&rt6_lock);
1481 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1482 struct in6_rtmsg *rtmsg)
1484 memset(rtmsg, 0, sizeof(*rtmsg));
1486 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1487 rtmsg->rtmsg_src_len = r->rtm_src_len;
1488 rtmsg->rtmsg_flags = RTF_UP;
1489 if (r->rtm_type == RTN_UNREACHABLE)
1490 rtmsg->rtmsg_flags |= RTF_REJECT;
1492 if (rta[RTA_GATEWAY-1]) {
1493 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1495 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1496 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1498 if (rta[RTA_DST-1]) {
1499 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1501 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1503 if (rta[RTA_SRC-1]) {
1504 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1506 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1508 if (rta[RTA_OIF-1]) {
1509 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1511 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1513 if (rta[RTA_PRIORITY-1]) {
1514 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1516 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1521 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1523 struct rtmsg *r = NLMSG_DATA(nlh);
1524 struct in6_rtmsg rtmsg;
1526 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1528 return ip6_route_del(&rtmsg, nlh, arg);
1531 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1533 struct rtmsg *r = NLMSG_DATA(nlh);
1534 struct in6_rtmsg rtmsg;
1536 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1538 return ip6_route_add(&rtmsg, nlh, arg);
1541 struct rt6_rtnl_dump_arg
1543 struct sk_buff *skb;
1544 struct netlink_callback *cb;
1547 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1548 struct in6_addr *dst,
1549 struct in6_addr *src,
1551 int type, u32 pid, u32 seq,
1552 struct nlmsghdr *in_nlh, int prefix)
1555 struct nlmsghdr *nlh;
1556 unsigned char *b = skb->tail;
1557 struct rta_cacheinfo ci;
1559 if (prefix) { /* user wants prefix routes only */
1560 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1561 /* success since this is not a prefix route */
1566 if (!pid && in_nlh) {
1567 pid = in_nlh->nlmsg_pid;
1570 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1571 rtm = NLMSG_DATA(nlh);
1572 rtm->rtm_family = AF_INET6;
1573 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1574 rtm->rtm_src_len = rt->rt6i_src.plen;
1576 rtm->rtm_table = RT_TABLE_MAIN;
1577 if (rt->rt6i_flags&RTF_REJECT)
1578 rtm->rtm_type = RTN_UNREACHABLE;
1579 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1580 rtm->rtm_type = RTN_LOCAL;
1582 rtm->rtm_type = RTN_UNICAST;
1584 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1585 rtm->rtm_protocol = rt->rt6i_protocol;
1586 if (rt->rt6i_flags&RTF_DYNAMIC)
1587 rtm->rtm_protocol = RTPROT_REDIRECT;
1588 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1589 rtm->rtm_protocol = RTPROT_KERNEL;
1590 else if (rt->rt6i_flags&RTF_DEFAULT)
1591 rtm->rtm_protocol = RTPROT_RA;
1593 if (rt->rt6i_flags&RTF_CACHE)
1594 rtm->rtm_flags |= RTM_F_CLONED;
1597 RTA_PUT(skb, RTA_DST, 16, dst);
1598 rtm->rtm_dst_len = 128;
1599 } else if (rtm->rtm_dst_len)
1600 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1601 #ifdef CONFIG_IPV6_SUBTREES
1603 RTA_PUT(skb, RTA_SRC, 16, src);
1604 rtm->rtm_src_len = 128;
1605 } else if (rtm->rtm_src_len)
1606 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1609 RTA_PUT(skb, RTA_IIF, 4, &iif);
1611 struct in6_addr saddr_buf;
1612 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1613 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1615 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1616 goto rtattr_failure;
1617 if (rt->u.dst.neighbour)
1618 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1620 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1621 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1622 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1623 if (rt->rt6i_expires)
1624 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1627 ci.rta_used = rt->u.dst.__use;
1628 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1629 ci.rta_error = rt->u.dst.error;
1633 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1634 nlh->nlmsg_len = skb->tail - b;
1639 skb_trim(skb, b - skb->data);
1643 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1645 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1648 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1649 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1650 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1654 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1655 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1659 static int fib6_dump_node(struct fib6_walker_t *w)
1662 struct rt6_info *rt;
1664 for (rt = w->leaf; rt; rt = rt->u.next) {
1665 res = rt6_dump_route(rt, w->args);
1667 /* Frame is full, suspend walking */
1677 static void fib6_dump_end(struct netlink_callback *cb)
1679 struct fib6_walker_t *w = (void*)cb->args[0];
1683 fib6_walker_unlink(w);
1687 cb->done = (void*)cb->args[1];
1692 static int fib6_dump_done(struct netlink_callback *cb)
1695 return cb->done(cb);
1698 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1700 struct rt6_rtnl_dump_arg arg;
1701 struct fib6_walker_t *w;
1707 w = (void*)cb->args[0];
1711 * 1. hook callback destructor.
1713 cb->args[1] = (long)cb->done;
1714 cb->done = fib6_dump_done;
1717 * 2. allocate and initialize walker.
1719 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1722 RT6_TRACE("dump<%p", w);
1723 memset(w, 0, sizeof(*w));
1724 w->root = &ip6_routing_table;
1725 w->func = fib6_dump_node;
1727 cb->args[0] = (long)w;
1728 read_lock_bh(&rt6_lock);
1730 read_unlock_bh(&rt6_lock);
1733 read_lock_bh(&rt6_lock);
1734 res = fib6_walk_continue(w);
1735 read_unlock_bh(&rt6_lock);
1738 if (res <= 0 && skb->len == 0)
1739 RT6_TRACE("%p>dump end\n", w);
1741 res = res < 0 ? res : skb->len;
1742 /* res < 0 is an error. (really, impossible)
1743 res == 0 means that dump is complete, but skb still can contain data.
1744 res > 0 dump is not complete, but frame is full.
1746 /* Destroy walker, if dump of this table is complete. */
1752 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1754 struct rtattr **rta = arg;
1757 struct sk_buff *skb;
1759 struct rt6_info *rt;
1761 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1765 /* Reserve room for dummy headers, this skb can pass
1766 through good chunk of routing engine.
1768 skb->mac.raw = skb->data;
1769 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1771 memset(&fl, 0, sizeof(fl));
1773 ipv6_addr_copy(&fl.fl6_src,
1774 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1776 ipv6_addr_copy(&fl.fl6_dst,
1777 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1780 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1783 struct net_device *dev;
1784 dev = __dev_get_by_index(iif);
1793 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1795 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1797 skb->dst = &rt->u.dst;
1799 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1800 err = rt6_fill_node(skb, rt,
1801 &fl.fl6_dst, &fl.fl6_src,
1803 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1804 nlh->nlmsg_seq, nlh, 0);
1810 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1820 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1822 struct sk_buff *skb;
1823 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1825 skb = alloc_skb(size, gfp_any());
1827 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1830 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1832 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1835 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1836 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1843 #ifdef CONFIG_PROC_FS
1845 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1856 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1858 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1861 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1866 if (arg->len >= arg->length)
1869 for (i=0; i<16; i++) {
1870 sprintf(arg->buffer + arg->len, "%02x",
1871 rt->rt6i_dst.addr.s6_addr[i]);
1874 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1877 #ifdef CONFIG_IPV6_SUBTREES
1878 for (i=0; i<16; i++) {
1879 sprintf(arg->buffer + arg->len, "%02x",
1880 rt->rt6i_src.addr.s6_addr[i]);
1883 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1886 sprintf(arg->buffer + arg->len,
1887 "00000000000000000000000000000000 00 ");
1891 if (rt->rt6i_nexthop) {
1892 for (i=0; i<16; i++) {
1893 sprintf(arg->buffer + arg->len, "%02x",
1894 rt->rt6i_nexthop->primary_key[i]);
1898 sprintf(arg->buffer + arg->len,
1899 "00000000000000000000000000000000");
1902 arg->len += sprintf(arg->buffer + arg->len,
1903 " %08x %08x %08x %08x %8s\n",
1904 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1905 rt->u.dst.__use, rt->rt6i_flags,
1906 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1910 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1912 struct rt6_proc_arg arg;
1913 arg.buffer = buffer;
1914 arg.offset = offset;
1915 arg.length = length;
1919 read_lock_bh(&rt6_lock);
1920 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1921 read_unlock_bh(&rt6_lock);
1925 *start += offset % RT6_INFO_LEN;
1927 arg.len -= offset % RT6_INFO_LEN;
1929 if (arg.len > length)
1937 extern struct rt6_statistics rt6_stats;
1939 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1941 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1942 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1943 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1944 rt6_stats.fib_rt_cache,
1945 atomic_read(&ip6_dst_ops.entries),
1946 rt6_stats.fib_discarded_routes);
1951 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1953 return single_open(file, rt6_stats_seq_show, NULL);
1956 static struct file_operations rt6_stats_seq_fops = {
1957 .owner = THIS_MODULE,
1958 .open = rt6_stats_seq_open,
1960 .llseek = seq_lseek,
1961 .release = single_release,
1963 #endif /* CONFIG_PROC_FS */
1965 #ifdef CONFIG_SYSCTL
1967 static int flush_delay;
1970 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1971 void __user *buffer, size_t *lenp, loff_t *ppos)
1974 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1975 if (flush_delay < 0)
1977 fib6_run_gc((unsigned long)flush_delay);
1983 ctl_table ipv6_route_table[] = {
1985 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1986 .procname = "flush",
1987 .data = &flush_delay,
1988 .maxlen = sizeof(int),
1990 .proc_handler = &ipv6_sysctl_rtcache_flush
1993 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1994 .procname = "gc_thresh",
1995 .data = &ip6_dst_ops.gc_thresh,
1996 .maxlen = sizeof(int),
1998 .proc_handler = &proc_dointvec,
2001 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2002 .procname = "max_size",
2003 .data = &ip6_rt_max_size,
2004 .maxlen = sizeof(int),
2006 .proc_handler = &proc_dointvec,
2009 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2010 .procname = "gc_min_interval",
2011 .data = &ip6_rt_gc_min_interval,
2012 .maxlen = sizeof(int),
2014 .proc_handler = &proc_dointvec_jiffies,
2015 .strategy = &sysctl_jiffies,
2018 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2019 .procname = "gc_timeout",
2020 .data = &ip6_rt_gc_timeout,
2021 .maxlen = sizeof(int),
2023 .proc_handler = &proc_dointvec_jiffies,
2024 .strategy = &sysctl_jiffies,
2027 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2028 .procname = "gc_interval",
2029 .data = &ip6_rt_gc_interval,
2030 .maxlen = sizeof(int),
2032 .proc_handler = &proc_dointvec_jiffies,
2033 .strategy = &sysctl_jiffies,
2036 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2037 .procname = "gc_elasticity",
2038 .data = &ip6_rt_gc_elasticity,
2039 .maxlen = sizeof(int),
2041 .proc_handler = &proc_dointvec_jiffies,
2042 .strategy = &sysctl_jiffies,
2045 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2046 .procname = "mtu_expires",
2047 .data = &ip6_rt_mtu_expires,
2048 .maxlen = sizeof(int),
2050 .proc_handler = &proc_dointvec_jiffies,
2051 .strategy = &sysctl_jiffies,
2054 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2055 .procname = "min_adv_mss",
2056 .data = &ip6_rt_min_advmss,
2057 .maxlen = sizeof(int),
2059 .proc_handler = &proc_dointvec_jiffies,
2060 .strategy = &sysctl_jiffies,
2067 void __init ip6_route_init(void)
2069 struct proc_dir_entry *p;
2071 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2072 sizeof(struct rt6_info),
2073 0, SLAB_HWCACHE_ALIGN,
2075 if (!ip6_dst_ops.kmem_cachep)
2076 panic("cannot create ip6_dst_cache");
2079 #ifdef CONFIG_PROC_FS
2080 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2082 p->owner = THIS_MODULE;
2084 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2091 void __exit ip6_route_cleanup(void)
2093 #ifdef CONFIG_PROC_FS
2094 proc_net_remove("ipv6_route");
2095 proc_net_remove("rt6_stats");
2102 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);