2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static void ip6_dst_ifdown(struct dst_entry *, int how);
88 static int ip6_dst_gc(void);
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void ip6_link_failure(struct sk_buff *skb);
93 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 static struct dst_ops ip6_dst_ops = {
97 .protocol = __constant_htons(ETH_P_IPV6),
100 .check = ip6_dst_check,
101 .destroy = ip6_dst_destroy,
102 .ifdown = ip6_dst_ifdown,
103 .negative_advice = ip6_negative_advice,
104 .link_failure = ip6_link_failure,
105 .update_pmtu = ip6_rt_update_pmtu,
106 .entry_size = sizeof(struct rt6_info),
109 struct rt6_info ip6_null_entry = {
112 .__refcnt = ATOMIC_INIT(1),
114 .dev = &loopback_dev,
116 .error = -ENETUNREACH,
117 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
118 .input = ip6_pkt_discard,
119 .output = ip6_pkt_discard_out,
121 .path = (struct dst_entry*)&ip6_null_entry,
124 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
125 .rt6i_metric = ~(u32) 0,
126 .rt6i_ref = ATOMIC_INIT(1),
129 struct fib6_node ip6_routing_table = {
130 .leaf = &ip6_null_entry,
131 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 /* Protects all the ip6 fib */
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 static void ip6_dst_destroy(struct dst_entry *dst)
147 struct rt6_info *rt = (struct rt6_info *)dst;
148 struct inet6_dev *idev = rt->rt6i_idev;
151 rt->rt6i_idev = NULL;
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
158 ip6_dst_destroy(dst);
162 * Route lookup. Any rt6_lock is implied.
165 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
169 struct rt6_info *local = NULL;
170 struct rt6_info *sprt;
173 for (sprt = rt; sprt; sprt = sprt->u.next) {
174 struct net_device *dev = sprt->rt6i_dev;
175 if (dev->ifindex == oif)
177 if (dev->flags&IFF_LOOPBACK)
185 return &ip6_null_entry;
191 * pointer to the last default router chosen. BH is disabled locally.
193 static struct rt6_info *rt6_dflt_pointer;
194 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
196 /* Default Router Selection (RFC 2461 6.3.6) */
197 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
199 struct rt6_info *match = NULL;
200 struct rt6_info *sprt;
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct neighbour *neigh;
209 sprt->rt6i_dev->ifindex == oif))
212 if (sprt == rt6_dflt_pointer)
215 if ((neigh = sprt->rt6i_nexthop) != NULL) {
216 read_lock_bh(&neigh->lock);
217 switch (neigh->nud_state) {
235 read_unlock_bh(&neigh->lock);
238 read_unlock_bh(&neigh->lock);
243 if (m > mpri || m >= 12) {
247 /* we choose the last default router if it
248 * is in (probably) reachable state.
249 * If route changed, we should do pmtu
250 * discovery. --yoshfuji
257 spin_lock(&rt6_dflt_lock);
260 * No default routers are known to be reachable.
263 if (rt6_dflt_pointer) {
264 for (sprt = rt6_dflt_pointer->u.next;
265 sprt; sprt = sprt->u.next) {
266 if (sprt->u.dst.obsolete <= 0 &&
267 sprt->u.dst.error == 0) {
274 sprt = sprt->u.next) {
275 if (sprt->u.dst.obsolete <= 0 &&
276 sprt->u.dst.error == 0) {
280 if (sprt == rt6_dflt_pointer)
287 if (rt6_dflt_pointer != match)
288 RT6_TRACE("changed default router: %p->%p\n",
289 rt6_dflt_pointer, match);
290 rt6_dflt_pointer = match;
292 spin_unlock(&rt6_dflt_lock);
296 * Last Resort: if no default routers found,
297 * use addrconf default route.
298 * We don't record this route.
300 for (sprt = ip6_routing_table.leaf;
301 sprt; sprt = sprt->u.next) {
302 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
305 sprt->rt6i_dev->ifindex == oif))) {
311 /* no default route. give up. */
312 match = &ip6_null_entry;
319 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
322 struct fib6_node *fn;
325 read_lock_bh(&rt6_lock);
326 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
327 rt = rt6_device_match(fn->leaf, oif, strict);
328 dst_hold(&rt->u.dst);
330 read_unlock_bh(&rt6_lock);
332 rt->u.dst.lastuse = jiffies;
333 if (rt->u.dst.error == 0)
335 dst_release(&rt->u.dst);
339 /* rt6_ins is called with FREE rt6_lock.
340 It takes new route entry, the addition fails by any reason the
341 route is freed. In any case, if caller does not hold it, it may
345 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
349 write_lock_bh(&rt6_lock);
350 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
351 write_unlock_bh(&rt6_lock);
356 /* No rt6_lock! If COW failed, the function returns dead route entry
357 with dst->error set to errno value.
360 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
361 struct in6_addr *saddr)
370 rt = ip6_rt_copy(ort);
373 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
375 if (!(rt->rt6i_flags&RTF_GATEWAY))
376 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
378 rt->rt6i_dst.plen = 128;
379 rt->rt6i_flags |= RTF_CACHE;
380 rt->u.dst.flags |= DST_HOST;
382 #ifdef CONFIG_IPV6_SUBTREES
383 if (rt->rt6i_src.plen && saddr) {
384 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
385 rt->rt6i_src.plen = 128;
389 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
391 dst_hold(&rt->u.dst);
393 err = rt6_ins(rt, NULL, NULL);
397 rt->u.dst.error = err;
401 dst_hold(&ip6_null_entry.u.dst);
402 return &ip6_null_entry;
405 #define BACKTRACK() \
406 if (rt == &ip6_null_entry && strict) { \
407 while ((fn = fn->parent) != NULL) { \
408 if (fn->fn_flags & RTN_ROOT) { \
409 dst_hold(&rt->u.dst); \
412 if (fn->fn_flags & RTN_RTINFO) \
418 void ip6_route_input(struct sk_buff *skb)
420 struct fib6_node *fn;
425 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
428 read_lock_bh(&rt6_lock);
430 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
431 &skb->nh.ipv6h->saddr);
436 if ((rt->rt6i_flags & RTF_CACHE)) {
437 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
439 dst_hold(&rt->u.dst);
443 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
446 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
447 read_unlock_bh(&rt6_lock);
449 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
450 &skb->nh.ipv6h->saddr);
452 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
454 /* Race condition! In the gap, when rt6_lock was
455 released someone could insert this route. Relookup.
457 dst_release(&rt->u.dst);
460 dst_hold(&rt->u.dst);
463 read_unlock_bh(&rt6_lock);
465 rt->u.dst.lastuse = jiffies;
467 skb->dst = (struct dst_entry *) rt;
470 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
472 struct fib6_node *fn;
477 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
480 read_lock_bh(&rt6_lock);
482 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
487 if ((rt->rt6i_flags & RTF_CACHE)) {
488 rt = rt6_device_match(rt, fl->oif, strict);
490 dst_hold(&rt->u.dst);
493 if (rt->rt6i_flags & RTF_DEFAULT) {
494 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
495 rt = rt6_best_dflt(rt, fl->oif);
497 rt = rt6_device_match(rt, fl->oif, strict);
501 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
502 read_unlock_bh(&rt6_lock);
504 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
506 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
509 /* Race condition! In the gap, when rt6_lock was
510 released someone could insert this route. Relookup.
512 dst_release(&rt->u.dst);
515 dst_hold(&rt->u.dst);
518 read_unlock_bh(&rt6_lock);
520 rt->u.dst.lastuse = jiffies;
527 * Destination cache support functions
530 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
534 rt = (struct rt6_info *) dst;
536 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
543 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
545 struct rt6_info *rt = (struct rt6_info *) dst;
548 if (rt->rt6i_flags & RTF_CACHE)
549 ip6_del_rt(rt, NULL, NULL);
556 static void ip6_link_failure(struct sk_buff *skb)
560 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
562 rt = (struct rt6_info *) skb->dst;
564 if (rt->rt6i_flags&RTF_CACHE) {
565 dst_set_expires(&rt->u.dst, 0);
566 rt->rt6i_flags |= RTF_EXPIRES;
567 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
568 rt->rt6i_node->fn_sernum = -1;
572 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
574 struct rt6_info *rt6 = (struct rt6_info*)dst;
576 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
577 rt6->rt6i_flags |= RTF_MODIFIED;
578 if (mtu < IPV6_MIN_MTU)
580 dst->metrics[RTAX_MTU-1] = mtu;
584 /* Protected by rt6_lock. */
585 static struct dst_entry *ndisc_dst_gc_list;
586 static int ipv6_get_mtu(struct net_device *dev);
588 static inline unsigned int ipv6_advmss(unsigned int mtu)
590 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
592 if (mtu < ip6_rt_min_advmss)
593 mtu = ip6_rt_min_advmss;
596 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
597 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
598 * IPV6_MAXPLEN is also valid and means: "any MSS,
599 * rely only on pmtu discovery"
601 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
606 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
607 struct neighbour *neigh,
608 struct in6_addr *addr,
609 int (*output)(struct sk_buff **))
611 struct rt6_info *rt = ip6_dst_alloc();
613 if (unlikely(rt == NULL))
620 neigh = ndisc_get_neigh(dev, addr);
623 rt->rt6i_idev = in6_dev_get(dev);
624 rt->rt6i_nexthop = neigh;
625 atomic_set(&rt->u.dst.__refcnt, 1);
626 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
627 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
628 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
629 rt->u.dst.output = output;
631 #if 0 /* there's no chance to use these for ndisc */
632 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
635 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
636 rt->rt6i_dst.plen = 128;
639 write_lock_bh(&rt6_lock);
640 rt->u.dst.next = ndisc_dst_gc_list;
641 ndisc_dst_gc_list = &rt->u.dst;
642 write_unlock_bh(&rt6_lock);
644 fib6_force_start_gc();
647 return (struct dst_entry *)rt;
650 int ndisc_dst_gc(int *more)
652 struct dst_entry *dst, *next, **pprev;
656 pprev = &ndisc_dst_gc_list;
658 while ((dst = *pprev) != NULL) {
659 if (!atomic_read(&dst->__refcnt)) {
672 static int ip6_dst_gc(void)
674 static unsigned expire = 30*HZ;
675 static unsigned long last_gc;
676 unsigned long now = jiffies;
678 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
679 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
685 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
686 expire = ip6_rt_gc_timeout>>1;
689 expire -= expire>>ip6_rt_gc_elasticity;
690 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
693 /* Clean host part of a prefix. Not necessary in radix tree,
694 but results in cleaner routing tables.
696 Remove it only when all the things will work!
699 static int ipv6_get_mtu(struct net_device *dev)
701 int mtu = IPV6_MIN_MTU;
702 struct inet6_dev *idev;
704 idev = in6_dev_get(dev);
706 mtu = idev->cnf.mtu6;
712 static int ipv6_get_hoplimit(struct net_device *dev)
714 int hoplimit = ipv6_devconf.hop_limit;
715 struct inet6_dev *idev;
717 idev = in6_dev_get(dev);
719 hoplimit = idev->cnf.hop_limit;
729 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
735 struct net_device *dev = NULL;
738 rta = (struct rtattr **) _rtattr;
740 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
742 #ifndef CONFIG_IPV6_SUBTREES
743 if (rtmsg->rtmsg_src_len)
746 if (rtmsg->rtmsg_ifindex) {
747 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
752 if (rtmsg->rtmsg_metric == 0)
753 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
755 rt = ip6_dst_alloc();
760 rt->u.dst.obsolete = -1;
761 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
762 if (nlh && (r = NLMSG_DATA(nlh))) {
763 rt->rt6i_protocol = r->rtm_protocol;
765 rt->rt6i_protocol = RTPROT_BOOT;
768 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
770 if (addr_type & IPV6_ADDR_MULTICAST)
771 rt->u.dst.input = ip6_mc_input;
773 rt->u.dst.input = ip6_forward;
775 rt->u.dst.output = ip6_output;
777 ipv6_addr_prefix(&rt->rt6i_dst.addr,
778 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
779 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
780 if (rt->rt6i_dst.plen == 128)
781 rt->u.dst.flags = DST_HOST;
783 #ifdef CONFIG_IPV6_SUBTREES
784 ipv6_addr_prefix(&rt->rt6i_src.addr,
785 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
786 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
789 rt->rt6i_metric = rtmsg->rtmsg_metric;
791 /* We cannot add true routes via loopback here,
792 they would result in kernel looping; promote them to reject routes
794 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
795 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
800 rt->u.dst.output = ip6_pkt_discard_out;
801 rt->u.dst.input = ip6_pkt_discard;
802 rt->u.dst.error = -ENETUNREACH;
803 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
807 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
808 struct in6_addr *gw_addr;
811 gw_addr = &rtmsg->rtmsg_gateway;
812 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
813 gwa_type = ipv6_addr_type(gw_addr);
815 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
816 struct rt6_info *grt;
818 /* IPv6 strictly inhibits using not link-local
819 addresses as nexthop address.
820 Otherwise, router will not able to send redirects.
821 It is very good, but in some (rare!) circumstances
822 (SIT, PtP, NBMA NOARP links) it is handy to allow
823 some exceptions. --ANK
826 if (!(gwa_type&IPV6_ADDR_UNICAST))
829 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
835 if (dev != grt->rt6i_dev) {
836 dst_release(&grt->u.dst);
843 if (!(grt->rt6i_flags&RTF_GATEWAY))
845 dst_release(&grt->u.dst);
851 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
859 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
860 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
861 if (IS_ERR(rt->rt6i_nexthop)) {
862 err = PTR_ERR(rt->rt6i_nexthop);
863 rt->rt6i_nexthop = NULL;
868 rt->rt6i_flags = rtmsg->rtmsg_flags;
871 if (rta && rta[RTA_METRICS-1]) {
872 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
873 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
875 while (RTA_OK(attr, attrlen)) {
876 unsigned flavor = attr->rta_type;
878 if (flavor > RTAX_MAX) {
882 rt->u.dst.metrics[flavor-1] =
883 *(u32 *)RTA_DATA(attr);
885 attr = RTA_NEXT(attr, attrlen);
889 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
890 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
891 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
892 IPV6_DEFAULT_MCASTHOPS;
894 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
895 ipv6_get_hoplimit(dev);
898 if (!rt->u.dst.metrics[RTAX_MTU-1])
899 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
900 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
901 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
903 rt->rt6i_idev = in6_dev_get(dev);
904 return rt6_ins(rt, nlh, _rtattr);
909 dst_free((struct dst_entry *) rt);
913 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
917 write_lock_bh(&rt6_lock);
919 spin_lock_bh(&rt6_dflt_lock);
920 rt6_dflt_pointer = NULL;
921 spin_unlock_bh(&rt6_dflt_lock);
923 dst_release(&rt->u.dst);
925 err = fib6_del(rt, nlh, _rtattr);
926 write_unlock_bh(&rt6_lock);
931 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
933 struct fib6_node *fn;
937 read_lock_bh(&rt6_lock);
939 fn = fib6_locate(&ip6_routing_table,
940 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
941 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
944 for (rt = fn->leaf; rt; rt = rt->u.next) {
945 if (rtmsg->rtmsg_ifindex &&
946 (rt->rt6i_dev == NULL ||
947 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
949 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
950 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
952 if (rtmsg->rtmsg_metric &&
953 rtmsg->rtmsg_metric != rt->rt6i_metric)
955 dst_hold(&rt->u.dst);
956 read_unlock_bh(&rt6_lock);
958 return ip6_del_rt(rt, nlh, _rtattr);
961 read_unlock_bh(&rt6_lock);
969 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
970 struct neighbour *neigh, int on_link)
972 struct rt6_info *rt, *nrt;
974 /* Locate old route to this destination. */
975 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
980 if (neigh->dev != rt->rt6i_dev)
983 /* Redirect received -> path was valid.
984 Look, redirects are sent only in response to data packets,
985 so that this nexthop apparently is reachable. --ANK
987 dst_confirm(&rt->u.dst);
989 /* Duplicate redirect: silently ignore. */
990 if (neigh == rt->u.dst.neighbour)
993 /* Current route is on-link; redirect is always invalid.
995 Seems, previous statement is not true. It could
996 be node, which looks for us as on-link (f.e. proxy ndisc)
997 But then router serving it might decide, that we should
998 know truth 8)8) --ANK (980726).
1000 if (!(rt->rt6i_flags&RTF_GATEWAY))
1004 * RFC 2461 specifies that redirects should only be
1005 * accepted if they come from the nexthop to the target.
1006 * Due to the way default routers are chosen, this notion
1007 * is a bit fuzzy and one might need to check all default
1011 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1012 if (rt->rt6i_flags & RTF_DEFAULT) {
1013 struct rt6_info *rt1;
1015 read_lock(&rt6_lock);
1016 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1017 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1018 dst_hold(&rt1->u.dst);
1019 dst_release(&rt->u.dst);
1020 read_unlock(&rt6_lock);
1025 read_unlock(&rt6_lock);
1027 if (net_ratelimit())
1028 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1029 "for redirect target\n");
1036 * We have finally decided to accept it.
1039 nrt = ip6_rt_copy(rt);
1043 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1045 nrt->rt6i_flags &= ~RTF_GATEWAY;
1047 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1048 nrt->rt6i_dst.plen = 128;
1049 nrt->u.dst.flags |= DST_HOST;
1051 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1052 nrt->rt6i_nexthop = neigh_clone(neigh);
1053 /* Reset pmtu, it may be better */
1054 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1055 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1057 if (rt6_ins(nrt, NULL, NULL))
1060 if (rt->rt6i_flags&RTF_CACHE) {
1061 ip6_del_rt(rt, NULL, NULL);
1066 dst_release(&rt->u.dst);
1071 * Handle ICMP "packet too big" messages
1072 * i.e. Path MTU discovery
1075 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1076 struct net_device *dev, u32 pmtu)
1078 struct rt6_info *rt, *nrt;
1080 if (pmtu < IPV6_MIN_MTU) {
1081 if (net_ratelimit())
1082 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1084 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1085 link MTU if the node receives a Packet Too Big message
1086 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1088 pmtu = IPV6_MIN_MTU;
1091 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1096 if (pmtu >= dst_pmtu(&rt->u.dst))
1099 /* New mtu received -> path was valid.
1100 They are sent only in response to data packets,
1101 so that this nexthop apparently is reachable. --ANK
1103 dst_confirm(&rt->u.dst);
1105 /* Host route. If it is static, it would be better
1106 not to override it, but add new one, so that
1107 when cache entry will expire old pmtu
1108 would return automatically.
1110 if (rt->rt6i_flags & RTF_CACHE) {
1111 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1112 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1113 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1118 Two cases are possible:
1119 1. It is connected route. Action: COW
1120 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1122 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1123 nrt = rt6_cow(rt, daddr, saddr);
1124 if (!nrt->u.dst.error) {
1125 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1126 /* According to RFC 1981, detecting PMTU increase shouldn't be
1127 happened within 5 mins, the recommended timer is 10 mins.
1128 Here this route expiration time is set to ip6_rt_mtu_expires
1129 which is 10 mins. After 10 mins the decreased pmtu is expired
1130 and detecting PMTU increase will be automatically happened.
1132 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1133 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1135 dst_release(&nrt->u.dst);
1137 nrt = ip6_rt_copy(rt);
1140 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1141 nrt->rt6i_dst.plen = 128;
1142 nrt->u.dst.flags |= DST_HOST;
1143 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1144 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1145 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1146 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1147 rt6_ins(nrt, NULL, NULL);
1151 dst_release(&rt->u.dst);
1155 * Misc support functions
1158 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1160 struct rt6_info *rt = ip6_dst_alloc();
1163 rt->u.dst.input = ort->u.dst.input;
1164 rt->u.dst.output = ort->u.dst.output;
1166 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1167 rt->u.dst.dev = ort->u.dst.dev;
1169 dev_hold(rt->u.dst.dev);
1170 rt->rt6i_idev = ort->rt6i_idev;
1172 in6_dev_hold(rt->rt6i_idev);
1173 rt->u.dst.lastuse = jiffies;
1174 rt->rt6i_expires = 0;
1176 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1177 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1178 rt->rt6i_metric = 0;
1180 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1181 #ifdef CONFIG_IPV6_SUBTREES
1182 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1188 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1190 struct rt6_info *rt;
1191 struct fib6_node *fn;
1193 fn = &ip6_routing_table;
1195 write_lock_bh(&rt6_lock);
1196 for (rt = fn->leaf; rt; rt=rt->u.next) {
1197 if (dev == rt->rt6i_dev &&
1198 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1202 dst_hold(&rt->u.dst);
1203 write_unlock_bh(&rt6_lock);
1207 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1208 struct net_device *dev)
1210 struct in6_rtmsg rtmsg;
1212 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1213 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1214 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1215 rtmsg.rtmsg_metric = 1024;
1216 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1218 rtmsg.rtmsg_ifindex = dev->ifindex;
1220 ip6_route_add(&rtmsg, NULL, NULL);
1221 return rt6_get_dflt_router(gwaddr, dev);
1224 void rt6_purge_dflt_routers(int last_resort)
1226 struct rt6_info *rt;
1230 flags = RTF_ALLONLINK;
1232 flags = RTF_DEFAULT | RTF_ADDRCONF;
1235 read_lock_bh(&rt6_lock);
1236 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1237 if (rt->rt6i_flags & flags) {
1238 dst_hold(&rt->u.dst);
1240 spin_lock_bh(&rt6_dflt_lock);
1241 rt6_dflt_pointer = NULL;
1242 spin_unlock_bh(&rt6_dflt_lock);
1244 read_unlock_bh(&rt6_lock);
1246 ip6_del_rt(rt, NULL, NULL);
1251 read_unlock_bh(&rt6_lock);
1254 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1256 struct in6_rtmsg rtmsg;
1260 case SIOCADDRT: /* Add a route */
1261 case SIOCDELRT: /* Delete a route */
1262 if (!capable(CAP_NET_ADMIN))
1264 err = copy_from_user(&rtmsg, arg,
1265 sizeof(struct in6_rtmsg));
1272 err = ip6_route_add(&rtmsg, NULL, NULL);
1275 err = ip6_route_del(&rtmsg, NULL, NULL);
1289 * Drop the packet on the floor
1292 int ip6_pkt_discard(struct sk_buff *skb)
1294 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1295 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1300 int ip6_pkt_discard_out(struct sk_buff **pskb)
1302 return ip6_pkt_discard(*pskb);
1309 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1311 struct rt6_info *rt = ip6_dst_alloc();
1316 dev_hold(&loopback_dev);
1318 rt->u.dst.flags = DST_HOST;
1319 rt->u.dst.input = ip6_input;
1320 rt->u.dst.output = ip6_output;
1321 rt->rt6i_dev = &loopback_dev;
1322 rt->rt6i_idev = in6_dev_get(&loopback_dev);
1323 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1324 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1325 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1326 rt->u.dst.obsolete = -1;
1328 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1330 rt->rt6i_flags |= RTF_LOCAL;
1331 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1332 if (rt->rt6i_nexthop == NULL) {
1333 dst_free((struct dst_entry *) rt);
1337 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1338 rt->rt6i_dst.plen = 128;
1339 rt6_ins(rt, NULL, NULL);
1344 /* Delete address. Warning: you should check that this address
1345 disappeared before calling this function.
1348 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1350 struct rt6_info *rt;
1353 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1355 if (rt->rt6i_dst.plen == 128)
1356 err = ip6_del_rt(rt, NULL, NULL);
1358 dst_release(&rt->u.dst);
1364 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1366 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1367 rt != &ip6_null_entry) {
1368 RT6_TRACE("deleted by ifdown %p\n", rt);
1374 void rt6_ifdown(struct net_device *dev)
1376 write_lock_bh(&rt6_lock);
1377 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1378 write_unlock_bh(&rt6_lock);
1381 struct rt6_mtu_change_arg
1383 struct net_device *dev;
1387 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1389 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1390 struct inet6_dev *idev;
1392 /* In IPv6 pmtu discovery is not optional,
1393 so that RTAX_MTU lock cannot disable it.
1394 We still use this lock to block changes
1395 caused by addrconf/ndisc.
1398 idev = __in6_dev_get(arg->dev);
1402 /* For administrative MTU increase, there is no way to discover
1403 IPv6 PMTU increase, so PMTU increase should be updated here.
1404 Since RFC 1981 doesn't include administrative MTU increase
1405 update PMTU increase is a MUST. (i.e. jumbo frame)
1408 If new MTU is less than route PMTU, this new MTU will be the
1409 lowest MTU in the path, update the route PMTU to reflect PMTU
1410 decreases; if new MTU is greater than route PMTU, and the
1411 old MTU is the lowest MTU in the path, update the route PMTU
1412 to reflect the increase. In this case if the other nodes' MTU
1413 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1416 if (rt->rt6i_dev == arg->dev &&
1417 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1418 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1419 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1420 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1421 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1422 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1426 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1428 struct rt6_mtu_change_arg arg;
1432 read_lock_bh(&rt6_lock);
1433 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1434 read_unlock_bh(&rt6_lock);
1437 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1438 struct in6_rtmsg *rtmsg)
1440 memset(rtmsg, 0, sizeof(*rtmsg));
1442 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1443 rtmsg->rtmsg_src_len = r->rtm_src_len;
1444 rtmsg->rtmsg_flags = RTF_UP;
1445 if (r->rtm_type == RTN_UNREACHABLE)
1446 rtmsg->rtmsg_flags |= RTF_REJECT;
1448 if (rta[RTA_GATEWAY-1]) {
1449 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1451 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1452 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1454 if (rta[RTA_DST-1]) {
1455 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1457 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1459 if (rta[RTA_SRC-1]) {
1460 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1462 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1464 if (rta[RTA_OIF-1]) {
1465 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1467 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1469 if (rta[RTA_PRIORITY-1]) {
1470 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1472 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1477 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1479 struct rtmsg *r = NLMSG_DATA(nlh);
1480 struct in6_rtmsg rtmsg;
1482 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1484 return ip6_route_del(&rtmsg, nlh, arg);
1487 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1489 struct rtmsg *r = NLMSG_DATA(nlh);
1490 struct in6_rtmsg rtmsg;
1492 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1494 return ip6_route_add(&rtmsg, nlh, arg);
1497 struct rt6_rtnl_dump_arg
1499 struct sk_buff *skb;
1500 struct netlink_callback *cb;
1503 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1504 struct in6_addr *dst,
1505 struct in6_addr *src,
1507 int type, u32 pid, u32 seq,
1508 struct nlmsghdr *in_nlh, int prefix)
1511 struct nlmsghdr *nlh;
1512 unsigned char *b = skb->tail;
1513 struct rta_cacheinfo ci;
1515 if (prefix) { /* user wants prefix routes only */
1516 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1517 /* success since this is not a prefix route */
1522 if (!pid && in_nlh) {
1523 pid = in_nlh->nlmsg_pid;
1526 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1527 rtm = NLMSG_DATA(nlh);
1528 rtm->rtm_family = AF_INET6;
1529 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1530 rtm->rtm_src_len = rt->rt6i_src.plen;
1532 rtm->rtm_table = RT_TABLE_MAIN;
1533 if (rt->rt6i_flags&RTF_REJECT)
1534 rtm->rtm_type = RTN_UNREACHABLE;
1535 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1536 rtm->rtm_type = RTN_LOCAL;
1538 rtm->rtm_type = RTN_UNICAST;
1540 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1541 rtm->rtm_protocol = rt->rt6i_protocol;
1542 if (rt->rt6i_flags&RTF_DYNAMIC)
1543 rtm->rtm_protocol = RTPROT_REDIRECT;
1544 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1545 rtm->rtm_protocol = RTPROT_KERNEL;
1546 else if (rt->rt6i_flags&RTF_DEFAULT)
1547 rtm->rtm_protocol = RTPROT_RA;
1549 if (rt->rt6i_flags&RTF_CACHE)
1550 rtm->rtm_flags |= RTM_F_CLONED;
1553 RTA_PUT(skb, RTA_DST, 16, dst);
1554 rtm->rtm_dst_len = 128;
1555 } else if (rtm->rtm_dst_len)
1556 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1557 #ifdef CONFIG_IPV6_SUBTREES
1559 RTA_PUT(skb, RTA_SRC, 16, src);
1560 rtm->rtm_src_len = 128;
1561 } else if (rtm->rtm_src_len)
1562 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1565 RTA_PUT(skb, RTA_IIF, 4, &iif);
1567 struct in6_addr saddr_buf;
1568 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1569 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1571 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1572 goto rtattr_failure;
1573 if (rt->u.dst.neighbour)
1574 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1576 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1577 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1578 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1579 if (rt->rt6i_expires)
1580 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1583 ci.rta_used = rt->u.dst.__use;
1584 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1585 ci.rta_error = rt->u.dst.error;
1589 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1590 nlh->nlmsg_len = skb->tail - b;
1595 skb_trim(skb, b - skb->data);
1599 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1601 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1604 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1605 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1606 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1610 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1611 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1615 static int fib6_dump_node(struct fib6_walker_t *w)
1618 struct rt6_info *rt;
1620 for (rt = w->leaf; rt; rt = rt->u.next) {
1621 res = rt6_dump_route(rt, w->args);
1623 /* Frame is full, suspend walking */
1633 static void fib6_dump_end(struct netlink_callback *cb)
1635 struct fib6_walker_t *w = (void*)cb->args[0];
1639 fib6_walker_unlink(w);
1643 cb->done = (void*)cb->args[1];
1648 static int fib6_dump_done(struct netlink_callback *cb)
1651 return cb->done(cb);
1654 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1656 struct rt6_rtnl_dump_arg arg;
1657 struct fib6_walker_t *w;
1663 w = (void*)cb->args[0];
1667 * 1. hook callback destructor.
1669 cb->args[1] = (long)cb->done;
1670 cb->done = fib6_dump_done;
1673 * 2. allocate and initialize walker.
1675 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1678 RT6_TRACE("dump<%p", w);
1679 memset(w, 0, sizeof(*w));
1680 w->root = &ip6_routing_table;
1681 w->func = fib6_dump_node;
1683 cb->args[0] = (long)w;
1684 read_lock_bh(&rt6_lock);
1686 read_unlock_bh(&rt6_lock);
1689 read_lock_bh(&rt6_lock);
1690 res = fib6_walk_continue(w);
1691 read_unlock_bh(&rt6_lock);
1694 if (res <= 0 && skb->len == 0)
1695 RT6_TRACE("%p>dump end\n", w);
1697 res = res < 0 ? res : skb->len;
1698 /* res < 0 is an error. (really, impossible)
1699 res == 0 means that dump is complete, but skb still can contain data.
1700 res > 0 dump is not complete, but frame is full.
1702 /* Destroy walker, if dump of this table is complete. */
1708 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1710 struct rtattr **rta = arg;
1713 struct sk_buff *skb;
1715 struct rt6_info *rt;
1717 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1721 /* Reserve room for dummy headers, this skb can pass
1722 through good chunk of routing engine.
1724 skb->mac.raw = skb->data;
1725 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1727 memset(&fl, 0, sizeof(fl));
1729 ipv6_addr_copy(&fl.fl6_src,
1730 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1732 ipv6_addr_copy(&fl.fl6_dst,
1733 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1736 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1739 struct net_device *dev;
1740 dev = __dev_get_by_index(iif);
1749 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1751 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1753 skb->dst = &rt->u.dst;
1755 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1756 err = rt6_fill_node(skb, rt,
1757 &fl.fl6_dst, &fl.fl6_src,
1759 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1760 nlh->nlmsg_seq, nlh, 0);
1766 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1776 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1778 struct sk_buff *skb;
1779 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1781 skb = alloc_skb(size, gfp_any());
1783 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1786 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1788 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1791 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1792 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1799 #ifdef CONFIG_PROC_FS
1801 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1812 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1814 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1817 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1822 if (arg->len >= arg->length)
1825 for (i=0; i<16; i++) {
1826 sprintf(arg->buffer + arg->len, "%02x",
1827 rt->rt6i_dst.addr.s6_addr[i]);
1830 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1833 #ifdef CONFIG_IPV6_SUBTREES
1834 for (i=0; i<16; i++) {
1835 sprintf(arg->buffer + arg->len, "%02x",
1836 rt->rt6i_src.addr.s6_addr[i]);
1839 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1842 sprintf(arg->buffer + arg->len,
1843 "00000000000000000000000000000000 00 ");
1847 if (rt->rt6i_nexthop) {
1848 for (i=0; i<16; i++) {
1849 sprintf(arg->buffer + arg->len, "%02x",
1850 rt->rt6i_nexthop->primary_key[i]);
1854 sprintf(arg->buffer + arg->len,
1855 "00000000000000000000000000000000");
1858 arg->len += sprintf(arg->buffer + arg->len,
1859 " %08x %08x %08x %08x %8s\n",
1860 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1861 rt->u.dst.__use, rt->rt6i_flags,
1862 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1866 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1868 struct rt6_proc_arg arg;
1869 arg.buffer = buffer;
1870 arg.offset = offset;
1871 arg.length = length;
1875 read_lock_bh(&rt6_lock);
1876 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1877 read_unlock_bh(&rt6_lock);
1881 *start += offset % RT6_INFO_LEN;
1883 arg.len -= offset % RT6_INFO_LEN;
1885 if (arg.len > length)
1893 extern struct rt6_statistics rt6_stats;
1895 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1897 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1898 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1899 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1900 rt6_stats.fib_rt_cache,
1901 atomic_read(&ip6_dst_ops.entries),
1902 rt6_stats.fib_discarded_routes);
1907 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1909 return single_open(file, rt6_stats_seq_show, NULL);
1912 static struct file_operations rt6_stats_seq_fops = {
1913 .owner = THIS_MODULE,
1914 .open = rt6_stats_seq_open,
1916 .llseek = seq_lseek,
1917 .release = single_release,
1919 #endif /* CONFIG_PROC_FS */
1921 #ifdef CONFIG_SYSCTL
1923 static int flush_delay;
1926 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1927 void __user *buffer, size_t *lenp, loff_t *ppos)
1930 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1931 if (flush_delay < 0)
1933 fib6_run_gc((unsigned long)flush_delay);
1939 ctl_table ipv6_route_table[] = {
1941 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1942 .procname = "flush",
1943 .data = &flush_delay,
1944 .maxlen = sizeof(int),
1946 .proc_handler = &ipv6_sysctl_rtcache_flush
1949 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1950 .procname = "gc_thresh",
1951 .data = &ip6_dst_ops.gc_thresh,
1952 .maxlen = sizeof(int),
1954 .proc_handler = &proc_dointvec,
1957 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1958 .procname = "max_size",
1959 .data = &ip6_rt_max_size,
1960 .maxlen = sizeof(int),
1962 .proc_handler = &proc_dointvec,
1965 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1966 .procname = "gc_min_interval",
1967 .data = &ip6_rt_gc_min_interval,
1968 .maxlen = sizeof(int),
1970 .proc_handler = &proc_dointvec_jiffies,
1971 .strategy = &sysctl_jiffies,
1974 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1975 .procname = "gc_timeout",
1976 .data = &ip6_rt_gc_timeout,
1977 .maxlen = sizeof(int),
1979 .proc_handler = &proc_dointvec_jiffies,
1980 .strategy = &sysctl_jiffies,
1983 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
1984 .procname = "gc_interval",
1985 .data = &ip6_rt_gc_interval,
1986 .maxlen = sizeof(int),
1988 .proc_handler = &proc_dointvec_jiffies,
1989 .strategy = &sysctl_jiffies,
1992 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
1993 .procname = "gc_elasticity",
1994 .data = &ip6_rt_gc_elasticity,
1995 .maxlen = sizeof(int),
1997 .proc_handler = &proc_dointvec_jiffies,
1998 .strategy = &sysctl_jiffies,
2001 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2002 .procname = "mtu_expires",
2003 .data = &ip6_rt_mtu_expires,
2004 .maxlen = sizeof(int),
2006 .proc_handler = &proc_dointvec_jiffies,
2007 .strategy = &sysctl_jiffies,
2010 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2011 .procname = "min_adv_mss",
2012 .data = &ip6_rt_min_advmss,
2013 .maxlen = sizeof(int),
2015 .proc_handler = &proc_dointvec_jiffies,
2016 .strategy = &sysctl_jiffies,
2023 void __init ip6_route_init(void)
2025 struct proc_dir_entry *p;
2027 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2028 sizeof(struct rt6_info),
2029 0, SLAB_HWCACHE_ALIGN,
2031 if (!ip6_dst_ops.kmem_cachep)
2032 panic("cannot create ip6_dst_cache");
2035 #ifdef CONFIG_PROC_FS
2036 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2038 p->owner = THIS_MODULE;
2040 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2047 void __exit ip6_route_cleanup(void)
2049 #ifdef CONFIG_PROC_FS
2050 proc_net_remove("ipv6_route");
2051 proc_net_remove("rt6_stats");
2058 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);