2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static void ip6_dst_ifdown(struct dst_entry *, int how);
88 static int ip6_dst_gc(void);
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void ip6_link_failure(struct sk_buff *skb);
93 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 static struct dst_ops ip6_dst_ops = {
97 .protocol = __constant_htons(ETH_P_IPV6),
100 .check = ip6_dst_check,
101 .destroy = ip6_dst_destroy,
102 .ifdown = ip6_dst_ifdown,
103 .negative_advice = ip6_negative_advice,
104 .link_failure = ip6_link_failure,
105 .update_pmtu = ip6_rt_update_pmtu,
106 .entry_size = sizeof(struct rt6_info),
109 struct rt6_info ip6_null_entry = {
112 .__refcnt = ATOMIC_INIT(1),
114 .dev = &loopback_dev,
116 .error = -ENETUNREACH,
117 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
118 .input = ip6_pkt_discard,
119 .output = ip6_pkt_discard_out,
121 .path = (struct dst_entry*)&ip6_null_entry,
124 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
125 .rt6i_metric = ~(u32) 0,
126 .rt6i_ref = ATOMIC_INIT(1),
129 struct fib6_node ip6_routing_table = {
130 .leaf = &ip6_null_entry,
131 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 /* Protects all the ip6 fib */
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 static void ip6_dst_destroy(struct dst_entry *dst)
147 struct rt6_info *rt = (struct rt6_info *)dst;
148 struct inet6_dev *idev = rt->rt6i_idev;
151 rt->rt6i_idev = NULL;
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
158 ip6_dst_destroy(dst);
162 * Route lookup. Any rt6_lock is implied.
165 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
169 struct rt6_info *local = NULL;
170 struct rt6_info *sprt;
173 for (sprt = rt; sprt; sprt = sprt->u.next) {
174 struct net_device *dev = sprt->rt6i_dev;
175 if (dev->ifindex == oif)
177 if (dev->flags&IFF_LOOPBACK)
185 return &ip6_null_entry;
191 * pointer to the last default router chosen. BH is disabled locally.
193 static struct rt6_info *rt6_dflt_pointer;
194 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
196 /* Default Router Selection (RFC 2461 6.3.6) */
197 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
199 struct rt6_info *match = NULL;
200 struct rt6_info *sprt;
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct neighbour *neigh;
209 sprt->rt6i_dev->ifindex == oif))
212 if (sprt == rt6_dflt_pointer)
215 if ((neigh = sprt->rt6i_nexthop) != NULL) {
216 read_lock_bh(&neigh->lock);
217 switch (neigh->nud_state) {
235 read_unlock_bh(&neigh->lock);
238 read_unlock_bh(&neigh->lock);
243 if (m > mpri || m >= 12) {
247 /* we choose the last default router if it
248 * is in (probably) reachable state.
249 * If route changed, we should do pmtu
250 * discovery. --yoshfuji
257 spin_lock(&rt6_dflt_lock);
260 * No default routers are known to be reachable.
263 if (rt6_dflt_pointer) {
264 for (sprt = rt6_dflt_pointer->u.next;
265 sprt; sprt = sprt->u.next) {
266 if (sprt->u.dst.obsolete <= 0 &&
267 sprt->u.dst.error == 0) {
274 sprt = sprt->u.next) {
275 if (sprt->u.dst.obsolete <= 0 &&
276 sprt->u.dst.error == 0) {
280 if (sprt == rt6_dflt_pointer)
287 if (rt6_dflt_pointer != match)
288 RT6_TRACE("changed default router: %p->%p\n",
289 rt6_dflt_pointer, match);
290 rt6_dflt_pointer = match;
292 spin_unlock(&rt6_dflt_lock);
296 * Last Resort: if no default routers found,
297 * use addrconf default route.
298 * We don't record this route.
300 for (sprt = ip6_routing_table.leaf;
301 sprt; sprt = sprt->u.next) {
302 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
305 sprt->rt6i_dev->ifindex == oif))) {
311 /* no default route. give up. */
312 match = &ip6_null_entry;
319 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
322 struct fib6_node *fn;
325 read_lock_bh(&rt6_lock);
326 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
327 rt = rt6_device_match(fn->leaf, oif, strict);
328 dst_hold(&rt->u.dst);
330 read_unlock_bh(&rt6_lock);
332 rt->u.dst.lastuse = jiffies;
333 if (rt->u.dst.error == 0)
335 dst_release(&rt->u.dst);
339 /* rt6_ins is called with FREE rt6_lock.
340 It takes new route entry, the addition fails by any reason the
341 route is freed. In any case, if caller does not hold it, it may
345 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
349 write_lock_bh(&rt6_lock);
350 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
351 write_unlock_bh(&rt6_lock);
356 /* No rt6_lock! If COW failed, the function returns dead route entry
357 with dst->error set to errno value.
360 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
361 struct in6_addr *saddr)
370 rt = ip6_rt_copy(ort);
373 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
375 if (!(rt->rt6i_flags&RTF_GATEWAY))
376 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
378 rt->rt6i_dst.plen = 128;
379 rt->rt6i_flags |= RTF_CACHE;
380 rt->u.dst.flags |= DST_HOST;
382 #ifdef CONFIG_IPV6_SUBTREES
383 if (rt->rt6i_src.plen && saddr) {
384 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
385 rt->rt6i_src.plen = 128;
389 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
391 dst_hold(&rt->u.dst);
393 err = rt6_ins(rt, NULL, NULL);
397 rt->u.dst.error = err;
401 dst_hold(&ip6_null_entry.u.dst);
402 return &ip6_null_entry;
405 #define BACKTRACK() \
406 if (rt == &ip6_null_entry && strict) { \
407 while ((fn = fn->parent) != NULL) { \
408 if (fn->fn_flags & RTN_ROOT) { \
409 dst_hold(&rt->u.dst); \
412 if (fn->fn_flags & RTN_RTINFO) \
418 void ip6_route_input(struct sk_buff *skb)
420 struct fib6_node *fn;
425 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
428 read_lock_bh(&rt6_lock);
430 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
431 &skb->nh.ipv6h->saddr);
436 if ((rt->rt6i_flags & RTF_CACHE)) {
437 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
439 dst_hold(&rt->u.dst);
443 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
446 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
447 read_unlock_bh(&rt6_lock);
449 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
450 &skb->nh.ipv6h->saddr);
452 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
454 /* Race condition! In the gap, when rt6_lock was
455 released someone could insert this route. Relookup.
457 dst_release(&rt->u.dst);
460 dst_hold(&rt->u.dst);
463 read_unlock_bh(&rt6_lock);
465 rt->u.dst.lastuse = jiffies;
467 skb->dst = (struct dst_entry *) rt;
470 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
472 struct fib6_node *fn;
477 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
480 read_lock_bh(&rt6_lock);
482 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
487 if ((rt->rt6i_flags & RTF_CACHE)) {
488 rt = rt6_device_match(rt, fl->oif, strict);
490 dst_hold(&rt->u.dst);
493 if (rt->rt6i_flags & RTF_DEFAULT) {
494 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
495 rt = rt6_best_dflt(rt, fl->oif);
497 rt = rt6_device_match(rt, fl->oif, strict);
501 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
502 read_unlock_bh(&rt6_lock);
504 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
506 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
509 /* Race condition! In the gap, when rt6_lock was
510 released someone could insert this route. Relookup.
512 dst_release(&rt->u.dst);
515 dst_hold(&rt->u.dst);
518 read_unlock_bh(&rt6_lock);
520 rt->u.dst.lastuse = jiffies;
527 * Destination cache support functions
530 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
534 rt = (struct rt6_info *) dst;
536 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
543 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
545 struct rt6_info *rt = (struct rt6_info *) dst;
548 if (rt->rt6i_flags & RTF_CACHE)
549 ip6_del_rt(rt, NULL, NULL);
556 static void ip6_link_failure(struct sk_buff *skb)
560 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
562 rt = (struct rt6_info *) skb->dst;
564 if (rt->rt6i_flags&RTF_CACHE) {
565 dst_set_expires(&rt->u.dst, 0);
566 rt->rt6i_flags |= RTF_EXPIRES;
567 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
568 rt->rt6i_node->fn_sernum = -1;
572 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
574 struct rt6_info *rt6 = (struct rt6_info*)dst;
576 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
577 rt6->rt6i_flags |= RTF_MODIFIED;
578 if (mtu < IPV6_MIN_MTU)
580 dst->metrics[RTAX_MTU-1] = mtu;
584 /* Protected by rt6_lock. */
585 static struct dst_entry *ndisc_dst_gc_list;
586 static int ipv6_get_mtu(struct net_device *dev);
587 static inline unsigned int ipv6_advmss(unsigned int mtu);
589 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
590 struct neighbour *neigh,
591 struct in6_addr *addr,
592 int (*output)(struct sk_buff **))
594 struct rt6_info *rt = ip6_dst_alloc();
596 if (unlikely(rt == NULL))
603 neigh = ndisc_get_neigh(dev, addr);
606 rt->rt6i_idev = in6_dev_get(dev);
607 rt->rt6i_nexthop = neigh;
608 rt->rt6i_expires = 0;
609 rt->rt6i_flags = RTF_LOCAL;
611 atomic_set(&rt->u.dst.__refcnt, 1);
612 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
613 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
614 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
615 rt->u.dst.output = output;
617 write_lock_bh(&rt6_lock);
618 rt->u.dst.next = ndisc_dst_gc_list;
619 ndisc_dst_gc_list = &rt->u.dst;
620 write_unlock_bh(&rt6_lock);
622 fib6_force_start_gc();
625 return (struct dst_entry *)rt;
628 int ndisc_dst_gc(int *more)
630 struct dst_entry *dst, *next, **pprev;
634 pprev = &ndisc_dst_gc_list;
636 while ((dst = *pprev) != NULL) {
637 if (!atomic_read(&dst->__refcnt)) {
650 static int ip6_dst_gc(void)
652 static unsigned expire = 30*HZ;
653 static unsigned long last_gc;
654 unsigned long now = jiffies;
656 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
657 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
663 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
664 expire = ip6_rt_gc_timeout>>1;
667 expire -= expire>>ip6_rt_gc_elasticity;
668 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
671 /* Clean host part of a prefix. Not necessary in radix tree,
672 but results in cleaner routing tables.
674 Remove it only when all the things will work!
677 static int ipv6_get_mtu(struct net_device *dev)
679 int mtu = IPV6_MIN_MTU;
680 struct inet6_dev *idev;
682 idev = in6_dev_get(dev);
684 mtu = idev->cnf.mtu6;
690 static inline unsigned int ipv6_advmss(unsigned int mtu)
692 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
694 if (mtu < ip6_rt_min_advmss)
695 mtu = ip6_rt_min_advmss;
698 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
699 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
700 * IPV6_MAXPLEN is also valid and means: "any MSS,
701 * rely only on pmtu discovery"
703 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
708 static int ipv6_get_hoplimit(struct net_device *dev)
710 int hoplimit = ipv6_devconf.hop_limit;
711 struct inet6_dev *idev;
713 idev = in6_dev_get(dev);
715 hoplimit = idev->cnf.hop_limit;
725 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
731 struct net_device *dev = NULL;
734 rta = (struct rtattr **) _rtattr;
736 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
738 #ifndef CONFIG_IPV6_SUBTREES
739 if (rtmsg->rtmsg_src_len)
742 if (rtmsg->rtmsg_ifindex) {
743 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
748 if (rtmsg->rtmsg_metric == 0)
749 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
751 rt = ip6_dst_alloc();
756 rt->u.dst.obsolete = -1;
757 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
758 if (nlh && (r = NLMSG_DATA(nlh))) {
759 rt->rt6i_protocol = r->rtm_protocol;
761 rt->rt6i_protocol = RTPROT_BOOT;
764 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
766 if (addr_type & IPV6_ADDR_MULTICAST)
767 rt->u.dst.input = ip6_mc_input;
769 rt->u.dst.input = ip6_forward;
771 rt->u.dst.output = ip6_output;
773 ipv6_addr_prefix(&rt->rt6i_dst.addr,
774 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
775 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
776 if (rt->rt6i_dst.plen == 128)
777 rt->u.dst.flags = DST_HOST;
779 #ifdef CONFIG_IPV6_SUBTREES
780 ipv6_addr_prefix(&rt->rt6i_src.addr,
781 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
782 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
785 rt->rt6i_metric = rtmsg->rtmsg_metric;
787 /* We cannot add true routes via loopback here,
788 they would result in kernel looping; promote them to reject routes
790 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
791 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
796 rt->u.dst.output = ip6_pkt_discard_out;
797 rt->u.dst.input = ip6_pkt_discard;
798 rt->u.dst.error = -ENETUNREACH;
799 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
803 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
804 struct in6_addr *gw_addr;
807 gw_addr = &rtmsg->rtmsg_gateway;
808 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
809 gwa_type = ipv6_addr_type(gw_addr);
811 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
812 struct rt6_info *grt;
814 /* IPv6 strictly inhibits using not link-local
815 addresses as nexthop address.
816 Otherwise, router will not able to send redirects.
817 It is very good, but in some (rare!) circumstances
818 (SIT, PtP, NBMA NOARP links) it is handy to allow
819 some exceptions. --ANK
822 if (!(gwa_type&IPV6_ADDR_UNICAST))
825 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
831 if (dev != grt->rt6i_dev) {
832 dst_release(&grt->u.dst);
839 if (!(grt->rt6i_flags&RTF_GATEWAY))
841 dst_release(&grt->u.dst);
847 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
855 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
856 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
857 if (IS_ERR(rt->rt6i_nexthop)) {
858 err = PTR_ERR(rt->rt6i_nexthop);
859 rt->rt6i_nexthop = NULL;
864 rt->rt6i_flags = rtmsg->rtmsg_flags;
867 if (rta && rta[RTA_METRICS-1]) {
868 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
869 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
871 while (RTA_OK(attr, attrlen)) {
872 unsigned flavor = attr->rta_type;
874 if (flavor > RTAX_MAX) {
878 rt->u.dst.metrics[flavor-1] =
879 *(u32 *)RTA_DATA(attr);
881 attr = RTA_NEXT(attr, attrlen);
885 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
886 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
887 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
888 IPV6_DEFAULT_MCASTHOPS;
890 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
891 ipv6_get_hoplimit(dev);
894 if (!rt->u.dst.metrics[RTAX_MTU-1])
895 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
896 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
897 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
899 rt->rt6i_idev = in6_dev_get(dev);
900 return rt6_ins(rt, nlh, _rtattr);
905 dst_free((struct dst_entry *) rt);
909 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
913 write_lock_bh(&rt6_lock);
915 spin_lock_bh(&rt6_dflt_lock);
916 rt6_dflt_pointer = NULL;
917 spin_unlock_bh(&rt6_dflt_lock);
919 dst_release(&rt->u.dst);
921 err = fib6_del(rt, nlh, _rtattr);
922 write_unlock_bh(&rt6_lock);
927 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
929 struct fib6_node *fn;
933 read_lock_bh(&rt6_lock);
935 fn = fib6_locate(&ip6_routing_table,
936 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
937 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
940 for (rt = fn->leaf; rt; rt = rt->u.next) {
941 if (rtmsg->rtmsg_ifindex &&
942 (rt->rt6i_dev == NULL ||
943 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
945 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
946 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
948 if (rtmsg->rtmsg_metric &&
949 rtmsg->rtmsg_metric != rt->rt6i_metric)
951 dst_hold(&rt->u.dst);
952 read_unlock_bh(&rt6_lock);
954 return ip6_del_rt(rt, nlh, _rtattr);
957 read_unlock_bh(&rt6_lock);
965 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
966 struct neighbour *neigh, int on_link)
968 struct rt6_info *rt, *nrt;
970 /* Locate old route to this destination. */
971 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
976 if (neigh->dev != rt->rt6i_dev)
979 /* Redirect received -> path was valid.
980 Look, redirects are sent only in response to data packets,
981 so that this nexthop apparently is reachable. --ANK
983 dst_confirm(&rt->u.dst);
985 /* Duplicate redirect: silently ignore. */
986 if (neigh == rt->u.dst.neighbour)
989 /* Current route is on-link; redirect is always invalid.
991 Seems, previous statement is not true. It could
992 be node, which looks for us as on-link (f.e. proxy ndisc)
993 But then router serving it might decide, that we should
994 know truth 8)8) --ANK (980726).
996 if (!(rt->rt6i_flags&RTF_GATEWAY))
1000 * RFC 2461 specifies that redirects should only be
1001 * accepted if they come from the nexthop to the target.
1002 * Due to the way default routers are chosen, this notion
1003 * is a bit fuzzy and one might need to check all default
1007 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1008 if (rt->rt6i_flags & RTF_DEFAULT) {
1009 struct rt6_info *rt1;
1011 read_lock(&rt6_lock);
1012 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1013 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1014 dst_hold(&rt1->u.dst);
1015 dst_release(&rt->u.dst);
1016 read_unlock(&rt6_lock);
1021 read_unlock(&rt6_lock);
1023 if (net_ratelimit())
1024 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1025 "for redirect target\n");
1032 * We have finally decided to accept it.
1035 nrt = ip6_rt_copy(rt);
1039 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1041 nrt->rt6i_flags &= ~RTF_GATEWAY;
1043 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1044 nrt->rt6i_dst.plen = 128;
1045 nrt->u.dst.flags |= DST_HOST;
1047 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1048 nrt->rt6i_nexthop = neigh_clone(neigh);
1049 /* Reset pmtu, it may be better */
1050 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1051 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1053 if (rt6_ins(nrt, NULL, NULL))
1056 if (rt->rt6i_flags&RTF_CACHE) {
1057 ip6_del_rt(rt, NULL, NULL);
1062 dst_release(&rt->u.dst);
1067 * Handle ICMP "packet too big" messages
1068 * i.e. Path MTU discovery
1071 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1072 struct net_device *dev, u32 pmtu)
1074 struct rt6_info *rt, *nrt;
1076 if (pmtu < IPV6_MIN_MTU) {
1077 if (net_ratelimit())
1078 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1080 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1081 link MTU if the node receives a Packet Too Big message
1082 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1084 pmtu = IPV6_MIN_MTU;
1087 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1092 if (pmtu >= dst_pmtu(&rt->u.dst))
1095 /* New mtu received -> path was valid.
1096 They are sent only in response to data packets,
1097 so that this nexthop apparently is reachable. --ANK
1099 dst_confirm(&rt->u.dst);
1101 /* Host route. If it is static, it would be better
1102 not to override it, but add new one, so that
1103 when cache entry will expire old pmtu
1104 would return automatically.
1106 if (rt->rt6i_flags & RTF_CACHE) {
1107 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1108 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1109 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1114 Two cases are possible:
1115 1. It is connected route. Action: COW
1116 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1118 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1119 nrt = rt6_cow(rt, daddr, saddr);
1120 if (!nrt->u.dst.error) {
1121 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1122 /* According to RFC 1981, detecting PMTU increase shouldn't be
1123 happened within 5 mins, the recommended timer is 10 mins.
1124 Here this route expiration time is set to ip6_rt_mtu_expires
1125 which is 10 mins. After 10 mins the decreased pmtu is expired
1126 and detecting PMTU increase will be automatically happened.
1128 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1129 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1131 dst_release(&nrt->u.dst);
1133 nrt = ip6_rt_copy(rt);
1136 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1137 nrt->rt6i_dst.plen = 128;
1138 nrt->u.dst.flags |= DST_HOST;
1139 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1140 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1141 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1142 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1143 rt6_ins(nrt, NULL, NULL);
1147 dst_release(&rt->u.dst);
1151 * Misc support functions
1154 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1156 struct rt6_info *rt = ip6_dst_alloc();
1159 rt->u.dst.input = ort->u.dst.input;
1160 rt->u.dst.output = ort->u.dst.output;
1162 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1163 rt->u.dst.dev = ort->u.dst.dev;
1165 dev_hold(rt->u.dst.dev);
1166 rt->rt6i_idev = ort->rt6i_idev;
1168 in6_dev_hold(rt->rt6i_idev);
1169 rt->u.dst.lastuse = jiffies;
1170 rt->rt6i_expires = 0;
1172 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1173 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1174 rt->rt6i_metric = 0;
1176 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1177 #ifdef CONFIG_IPV6_SUBTREES
1178 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1184 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1186 struct rt6_info *rt;
1187 struct fib6_node *fn;
1189 fn = &ip6_routing_table;
1191 write_lock_bh(&rt6_lock);
1192 for (rt = fn->leaf; rt; rt=rt->u.next) {
1193 if (dev == rt->rt6i_dev &&
1194 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1198 dst_hold(&rt->u.dst);
1199 write_unlock_bh(&rt6_lock);
1203 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1204 struct net_device *dev)
1206 struct in6_rtmsg rtmsg;
1208 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1209 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1210 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1211 rtmsg.rtmsg_metric = 1024;
1212 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1214 rtmsg.rtmsg_ifindex = dev->ifindex;
1216 ip6_route_add(&rtmsg, NULL, NULL);
1217 return rt6_get_dflt_router(gwaddr, dev);
1220 void rt6_purge_dflt_routers(int last_resort)
1222 struct rt6_info *rt;
1226 flags = RTF_ALLONLINK;
1228 flags = RTF_DEFAULT | RTF_ADDRCONF;
1231 read_lock_bh(&rt6_lock);
1232 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1233 if (rt->rt6i_flags & flags) {
1234 dst_hold(&rt->u.dst);
1236 spin_lock_bh(&rt6_dflt_lock);
1237 rt6_dflt_pointer = NULL;
1238 spin_unlock_bh(&rt6_dflt_lock);
1240 read_unlock_bh(&rt6_lock);
1242 ip6_del_rt(rt, NULL, NULL);
1247 read_unlock_bh(&rt6_lock);
1250 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1252 struct in6_rtmsg rtmsg;
1256 case SIOCADDRT: /* Add a route */
1257 case SIOCDELRT: /* Delete a route */
1258 if (!capable(CAP_NET_ADMIN))
1260 err = copy_from_user(&rtmsg, arg,
1261 sizeof(struct in6_rtmsg));
1268 err = ip6_route_add(&rtmsg, NULL, NULL);
1271 err = ip6_route_del(&rtmsg, NULL, NULL);
1285 * Drop the packet on the floor
1288 int ip6_pkt_discard(struct sk_buff *skb)
1290 IP6_INC_STATS(OutNoRoutes);
1291 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1296 int ip6_pkt_discard_out(struct sk_buff **pskb)
1298 return ip6_pkt_discard(*pskb);
1305 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1307 struct rt6_info *rt = ip6_dst_alloc();
1312 dev_hold(&loopback_dev);
1314 rt->u.dst.flags = DST_HOST;
1315 rt->u.dst.input = ip6_input;
1316 rt->u.dst.output = ip6_output;
1317 rt->rt6i_dev = &loopback_dev;
1318 rt->rt6i_idev = in6_dev_get(&loopback_dev);
1319 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1320 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1321 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1322 rt->u.dst.obsolete = -1;
1324 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1326 rt->rt6i_flags |= RTF_LOCAL;
1327 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1328 if (rt->rt6i_nexthop == NULL) {
1329 dst_free((struct dst_entry *) rt);
1333 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1334 rt->rt6i_dst.plen = 128;
1335 rt6_ins(rt, NULL, NULL);
1340 /* Delete address. Warning: you should check that this address
1341 disappeared before calling this function.
1344 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1346 struct rt6_info *rt;
1349 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1351 if (rt->rt6i_dst.plen == 128)
1352 err = ip6_del_rt(rt, NULL, NULL);
1354 dst_release(&rt->u.dst);
1360 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1362 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1363 rt != &ip6_null_entry) {
1364 RT6_TRACE("deleted by ifdown %p\n", rt);
1370 void rt6_ifdown(struct net_device *dev)
1372 write_lock_bh(&rt6_lock);
1373 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1374 write_unlock_bh(&rt6_lock);
1377 struct rt6_mtu_change_arg
1379 struct net_device *dev;
1383 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1385 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1386 struct inet6_dev *idev;
1388 /* In IPv6 pmtu discovery is not optional,
1389 so that RTAX_MTU lock cannot disable it.
1390 We still use this lock to block changes
1391 caused by addrconf/ndisc.
1394 idev = __in6_dev_get(arg->dev);
1398 /* For administrative MTU increase, there is no way to discover
1399 IPv6 PMTU increase, so PMTU increase should be updated here.
1400 Since RFC 1981 doesn't include administrative MTU increase
1401 update PMTU increase is a MUST. (i.e. jumbo frame)
1404 If new MTU is less than route PMTU, this new MTU will be the
1405 lowest MTU in the path, update the route PMTU to reflect PMTU
1406 decreases; if new MTU is greater than route PMTU, and the
1407 old MTU is the lowest MTU in the path, update the route PMTU
1408 to reflect the increase. In this case if the other nodes' MTU
1409 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1412 if (rt->rt6i_dev == arg->dev &&
1413 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1414 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1415 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1416 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1417 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1418 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1422 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1424 struct rt6_mtu_change_arg arg;
1428 read_lock_bh(&rt6_lock);
1429 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1430 read_unlock_bh(&rt6_lock);
1433 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1434 struct in6_rtmsg *rtmsg)
1436 memset(rtmsg, 0, sizeof(*rtmsg));
1438 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1439 rtmsg->rtmsg_src_len = r->rtm_src_len;
1440 rtmsg->rtmsg_flags = RTF_UP;
1441 if (r->rtm_type == RTN_UNREACHABLE)
1442 rtmsg->rtmsg_flags |= RTF_REJECT;
1444 if (rta[RTA_GATEWAY-1]) {
1445 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1447 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1448 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1450 if (rta[RTA_DST-1]) {
1451 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1453 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1455 if (rta[RTA_SRC-1]) {
1456 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1458 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1460 if (rta[RTA_OIF-1]) {
1461 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1463 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1465 if (rta[RTA_PRIORITY-1]) {
1466 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1468 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1473 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1475 struct rtmsg *r = NLMSG_DATA(nlh);
1476 struct in6_rtmsg rtmsg;
1478 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1480 return ip6_route_del(&rtmsg, nlh, arg);
1483 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1485 struct rtmsg *r = NLMSG_DATA(nlh);
1486 struct in6_rtmsg rtmsg;
1488 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1490 return ip6_route_add(&rtmsg, nlh, arg);
1493 struct rt6_rtnl_dump_arg
1495 struct sk_buff *skb;
1496 struct netlink_callback *cb;
1499 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1500 struct in6_addr *dst,
1501 struct in6_addr *src,
1503 int type, u32 pid, u32 seq,
1504 struct nlmsghdr *in_nlh, int prefix)
1507 struct nlmsghdr *nlh;
1508 unsigned char *b = skb->tail;
1509 struct rta_cacheinfo ci;
1511 if (prefix) { /* user wants prefix routes only */
1512 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1513 /* success since this is not a prefix route */
1518 if (!pid && in_nlh) {
1519 pid = in_nlh->nlmsg_pid;
1522 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1523 rtm = NLMSG_DATA(nlh);
1524 rtm->rtm_family = AF_INET6;
1525 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1526 rtm->rtm_src_len = rt->rt6i_src.plen;
1528 rtm->rtm_table = RT_TABLE_MAIN;
1529 if (rt->rt6i_flags&RTF_REJECT)
1530 rtm->rtm_type = RTN_UNREACHABLE;
1531 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1532 rtm->rtm_type = RTN_LOCAL;
1534 rtm->rtm_type = RTN_UNICAST;
1536 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1537 rtm->rtm_protocol = rt->rt6i_protocol;
1538 if (rt->rt6i_flags&RTF_DYNAMIC)
1539 rtm->rtm_protocol = RTPROT_REDIRECT;
1540 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1541 rtm->rtm_protocol = RTPROT_KERNEL;
1542 else if (rt->rt6i_flags&RTF_DEFAULT)
1543 rtm->rtm_protocol = RTPROT_RA;
1545 if (rt->rt6i_flags&RTF_CACHE)
1546 rtm->rtm_flags |= RTM_F_CLONED;
1549 RTA_PUT(skb, RTA_DST, 16, dst);
1550 rtm->rtm_dst_len = 128;
1551 } else if (rtm->rtm_dst_len)
1552 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1553 #ifdef CONFIG_IPV6_SUBTREES
1555 RTA_PUT(skb, RTA_SRC, 16, src);
1556 rtm->rtm_src_len = 128;
1557 } else if (rtm->rtm_src_len)
1558 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1561 RTA_PUT(skb, RTA_IIF, 4, &iif);
1563 struct in6_addr saddr_buf;
1564 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1565 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1567 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1568 goto rtattr_failure;
1569 if (rt->u.dst.neighbour)
1570 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1572 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1573 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1574 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1575 if (rt->rt6i_expires)
1576 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1579 ci.rta_used = rt->u.dst.__use;
1580 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1581 ci.rta_error = rt->u.dst.error;
1585 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1586 nlh->nlmsg_len = skb->tail - b;
1591 skb_trim(skb, b - skb->data);
1595 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1597 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1600 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1601 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1602 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1606 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1607 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1611 static int fib6_dump_node(struct fib6_walker_t *w)
1614 struct rt6_info *rt;
1616 for (rt = w->leaf; rt; rt = rt->u.next) {
1617 res = rt6_dump_route(rt, w->args);
1619 /* Frame is full, suspend walking */
1629 static void fib6_dump_end(struct netlink_callback *cb)
1631 struct fib6_walker_t *w = (void*)cb->args[0];
1635 fib6_walker_unlink(w);
1639 cb->done = (void*)cb->args[1];
1644 static int fib6_dump_done(struct netlink_callback *cb)
1647 return cb->done(cb);
1650 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1652 struct rt6_rtnl_dump_arg arg;
1653 struct fib6_walker_t *w;
1659 w = (void*)cb->args[0];
1663 * 1. hook callback destructor.
1665 cb->args[1] = (long)cb->done;
1666 cb->done = fib6_dump_done;
1669 * 2. allocate and initialize walker.
1671 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1674 RT6_TRACE("dump<%p", w);
1675 memset(w, 0, sizeof(*w));
1676 w->root = &ip6_routing_table;
1677 w->func = fib6_dump_node;
1679 cb->args[0] = (long)w;
1680 read_lock_bh(&rt6_lock);
1682 read_unlock_bh(&rt6_lock);
1685 read_lock_bh(&rt6_lock);
1686 res = fib6_walk_continue(w);
1687 read_unlock_bh(&rt6_lock);
1690 if (res <= 0 && skb->len == 0)
1691 RT6_TRACE("%p>dump end\n", w);
1693 res = res < 0 ? res : skb->len;
1694 /* res < 0 is an error. (really, impossible)
1695 res == 0 means that dump is complete, but skb still can contain data.
1696 res > 0 dump is not complete, but frame is full.
1698 /* Destroy walker, if dump of this table is complete. */
1704 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1706 struct rtattr **rta = arg;
1709 struct sk_buff *skb;
1711 struct rt6_info *rt;
1713 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1717 /* Reserve room for dummy headers, this skb can pass
1718 through good chunk of routing engine.
1720 skb->mac.raw = skb->data;
1721 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1723 memset(&fl, 0, sizeof(fl));
1725 ipv6_addr_copy(&fl.fl6_src,
1726 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1728 ipv6_addr_copy(&fl.fl6_dst,
1729 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1732 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1735 struct net_device *dev;
1736 dev = __dev_get_by_index(iif);
1745 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1747 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1749 skb->dst = &rt->u.dst;
1751 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1752 err = rt6_fill_node(skb, rt,
1753 &fl.fl6_dst, &fl.fl6_src,
1755 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1756 nlh->nlmsg_seq, nlh, 0);
1762 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1772 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1774 struct sk_buff *skb;
1775 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1777 skb = alloc_skb(size, gfp_any());
1779 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1782 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1784 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1787 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1788 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1795 #ifdef CONFIG_PROC_FS
1797 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1808 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1810 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1813 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1818 if (arg->len >= arg->length)
1821 for (i=0; i<16; i++) {
1822 sprintf(arg->buffer + arg->len, "%02x",
1823 rt->rt6i_dst.addr.s6_addr[i]);
1826 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1829 #ifdef CONFIG_IPV6_SUBTREES
1830 for (i=0; i<16; i++) {
1831 sprintf(arg->buffer + arg->len, "%02x",
1832 rt->rt6i_src.addr.s6_addr[i]);
1835 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1838 sprintf(arg->buffer + arg->len,
1839 "00000000000000000000000000000000 00 ");
1843 if (rt->rt6i_nexthop) {
1844 for (i=0; i<16; i++) {
1845 sprintf(arg->buffer + arg->len, "%02x",
1846 rt->rt6i_nexthop->primary_key[i]);
1850 sprintf(arg->buffer + arg->len,
1851 "00000000000000000000000000000000");
1854 arg->len += sprintf(arg->buffer + arg->len,
1855 " %08x %08x %08x %08x %8s\n",
1856 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1857 rt->u.dst.__use, rt->rt6i_flags,
1858 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1862 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1864 struct rt6_proc_arg arg;
1865 arg.buffer = buffer;
1866 arg.offset = offset;
1867 arg.length = length;
1871 read_lock_bh(&rt6_lock);
1872 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1873 read_unlock_bh(&rt6_lock);
1877 *start += offset % RT6_INFO_LEN;
1879 arg.len -= offset % RT6_INFO_LEN;
1881 if (arg.len > length)
1889 extern struct rt6_statistics rt6_stats;
1891 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1893 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1894 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1895 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1896 rt6_stats.fib_rt_cache,
1897 atomic_read(&ip6_dst_ops.entries),
1898 rt6_stats.fib_discarded_routes);
1903 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1905 return single_open(file, rt6_stats_seq_show, NULL);
1908 static struct file_operations rt6_stats_seq_fops = {
1909 .owner = THIS_MODULE,
1910 .open = rt6_stats_seq_open,
1912 .llseek = seq_lseek,
1913 .release = single_release,
1915 #endif /* CONFIG_PROC_FS */
1917 #ifdef CONFIG_SYSCTL
1919 static int flush_delay;
1922 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1923 void __user *buffer, size_t *lenp)
1926 proc_dointvec(ctl, write, filp, buffer, lenp);
1927 if (flush_delay < 0)
1929 fib6_run_gc((unsigned long)flush_delay);
1935 ctl_table ipv6_route_table[] = {
1937 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1938 .procname = "flush",
1939 .data = &flush_delay,
1940 .maxlen = sizeof(int),
1942 .proc_handler = &ipv6_sysctl_rtcache_flush
1945 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1946 .procname = "gc_thresh",
1947 .data = &ip6_dst_ops.gc_thresh,
1948 .maxlen = sizeof(int),
1950 .proc_handler = &proc_dointvec,
1953 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1954 .procname = "max_size",
1955 .data = &ip6_rt_max_size,
1956 .maxlen = sizeof(int),
1958 .proc_handler = &proc_dointvec,
1961 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1962 .procname = "gc_min_interval",
1963 .data = &ip6_rt_gc_min_interval,
1964 .maxlen = sizeof(int),
1966 .proc_handler = &proc_dointvec_jiffies,
1967 .strategy = &sysctl_jiffies,
1970 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1971 .procname = "gc_timeout",
1972 .data = &ip6_rt_gc_timeout,
1973 .maxlen = sizeof(int),
1975 .proc_handler = &proc_dointvec_jiffies,
1976 .strategy = &sysctl_jiffies,
1979 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
1980 .procname = "gc_interval",
1981 .data = &ip6_rt_gc_interval,
1982 .maxlen = sizeof(int),
1984 .proc_handler = &proc_dointvec_jiffies,
1985 .strategy = &sysctl_jiffies,
1988 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
1989 .procname = "gc_elasticity",
1990 .data = &ip6_rt_gc_elasticity,
1991 .maxlen = sizeof(int),
1993 .proc_handler = &proc_dointvec_jiffies,
1994 .strategy = &sysctl_jiffies,
1997 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
1998 .procname = "mtu_expires",
1999 .data = &ip6_rt_mtu_expires,
2000 .maxlen = sizeof(int),
2002 .proc_handler = &proc_dointvec_jiffies,
2003 .strategy = &sysctl_jiffies,
2006 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2007 .procname = "min_adv_mss",
2008 .data = &ip6_rt_min_advmss,
2009 .maxlen = sizeof(int),
2011 .proc_handler = &proc_dointvec_jiffies,
2012 .strategy = &sysctl_jiffies,
2019 void __init ip6_route_init(void)
2021 struct proc_dir_entry *p;
2023 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2024 sizeof(struct rt6_info),
2025 0, SLAB_HWCACHE_ALIGN,
2027 if (!ip6_dst_ops.kmem_cachep)
2028 panic("cannot create ip6_dst_cache");
2031 #ifdef CONFIG_PROC_FS
2032 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2034 p->owner = THIS_MODULE;
2036 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2043 void __exit ip6_route_cleanup(void)
2045 #ifdef CONFIG_PROC_FS
2046 proc_net_remove("ipv6_route");
2047 proc_net_remove("rt6_stats");
2054 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);