2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int ip6_dst_gc(void);
88 static int ip6_pkt_discard(struct sk_buff *skb);
89 static void ip6_link_failure(struct sk_buff *skb);
90 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
92 static struct dst_ops ip6_dst_ops = {
94 .protocol = __constant_htons(ETH_P_IPV6),
97 .check = ip6_dst_check,
98 .negative_advice = ip6_negative_advice,
99 .link_failure = ip6_link_failure,
100 .update_pmtu = ip6_rt_update_pmtu,
101 .entry_size = sizeof(struct rt6_info),
104 struct rt6_info ip6_null_entry = {
107 .__refcnt = ATOMIC_INIT(1),
109 .dev = &loopback_dev,
111 .error = -ENETUNREACH,
112 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
113 .input = ip6_pkt_discard,
114 .output = ip6_pkt_discard,
116 .path = (struct dst_entry*)&ip6_null_entry,
119 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
120 .rt6i_metric = ~(u32) 0,
121 .rt6i_ref = ATOMIC_INIT(1),
124 struct fib6_node ip6_routing_table = {
125 .leaf = &ip6_null_entry,
126 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
129 /* Protects all the ip6 fib */
131 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
134 /* allocate dst with ip6_dst_ops */
135 static __inline__ struct rt6_info *ip6_dst_alloc(void)
137 return dst_alloc(&ip6_dst_ops);
141 * Route lookup. Any rt6_lock is implied.
144 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
148 struct rt6_info *local = NULL;
149 struct rt6_info *sprt;
152 for (sprt = rt; sprt; sprt = sprt->u.next) {
153 struct net_device *dev = sprt->rt6i_dev;
154 if (dev->ifindex == oif)
156 if (dev->flags&IFF_LOOPBACK)
164 return &ip6_null_entry;
170 * pointer to the last default router chosen. BH is disabled locally.
172 static struct rt6_info *rt6_dflt_pointer;
173 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
175 /* Default Router Selection (RFC 2461 6.3.6) */
176 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
178 struct rt6_info *match = NULL;
179 struct rt6_info *sprt;
182 for (sprt = rt; sprt; sprt = sprt->u.next) {
183 struct neighbour *neigh;
188 sprt->rt6i_dev->ifindex == oif))
191 if (sprt == rt6_dflt_pointer)
194 if ((neigh = sprt->rt6i_nexthop) != NULL) {
195 read_lock_bh(&neigh->lock);
196 switch (neigh->nud_state) {
214 read_unlock_bh(&neigh->lock);
217 read_unlock_bh(&neigh->lock);
222 if (m > mpri || m >= 12) {
226 /* we choose the last default router if it
227 * is in (probably) reachable state.
228 * If route changed, we should do pmtu
229 * discovery. --yoshfuji
236 spin_lock(&rt6_dflt_lock);
239 * No default routers are known to be reachable.
242 if (rt6_dflt_pointer) {
243 for (sprt = rt6_dflt_pointer->u.next;
244 sprt; sprt = sprt->u.next) {
245 if (sprt->u.dst.obsolete <= 0 &&
246 sprt->u.dst.error == 0) {
253 sprt = sprt->u.next) {
254 if (sprt->u.dst.obsolete <= 0 &&
255 sprt->u.dst.error == 0) {
259 if (sprt == rt6_dflt_pointer)
266 if (rt6_dflt_pointer != match)
267 RT6_TRACE("changed default router: %p->%p\n",
268 rt6_dflt_pointer, match);
269 rt6_dflt_pointer = match;
271 spin_unlock(&rt6_dflt_lock);
275 * Last Resort: if no default routers found,
276 * use addrconf default route.
277 * We don't record this route.
279 for (sprt = ip6_routing_table.leaf;
280 sprt; sprt = sprt->u.next) {
281 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
284 sprt->rt6i_dev->ifindex == oif))) {
290 /* no default route. give up. */
291 match = &ip6_null_entry;
298 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
301 struct fib6_node *fn;
304 read_lock_bh(&rt6_lock);
305 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
306 rt = rt6_device_match(fn->leaf, oif, strict);
307 dst_hold(&rt->u.dst);
309 read_unlock_bh(&rt6_lock);
311 rt->u.dst.lastuse = jiffies;
312 if (rt->u.dst.error == 0)
314 dst_release(&rt->u.dst);
318 /* rt6_ins is called with FREE rt6_lock.
319 It takes new route entry, the addition fails by any reason the
320 route is freed. In any case, if caller does not hold it, it may
324 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
328 write_lock_bh(&rt6_lock);
329 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
330 write_unlock_bh(&rt6_lock);
335 /* No rt6_lock! If COW failed, the function returns dead route entry
336 with dst->error set to errno value.
339 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
340 struct in6_addr *saddr)
349 rt = ip6_rt_copy(ort);
352 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
354 if (!(rt->rt6i_flags&RTF_GATEWAY))
355 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
357 rt->rt6i_dst.plen = 128;
358 rt->rt6i_flags |= RTF_CACHE;
359 rt->u.dst.flags |= DST_HOST;
361 #ifdef CONFIG_IPV6_SUBTREES
362 if (rt->rt6i_src.plen && saddr) {
363 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
364 rt->rt6i_src.plen = 128;
368 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
370 dst_hold(&rt->u.dst);
372 err = rt6_ins(rt, NULL, NULL);
376 rt->u.dst.error = err;
380 dst_hold(&ip6_null_entry.u.dst);
381 return &ip6_null_entry;
384 #define BACKTRACK() \
385 if (rt == &ip6_null_entry && strict) { \
386 while ((fn = fn->parent) != NULL) { \
387 if (fn->fn_flags & RTN_ROOT) { \
388 dst_hold(&rt->u.dst); \
391 if (fn->fn_flags & RTN_RTINFO) \
397 void ip6_route_input(struct sk_buff *skb)
399 struct fib6_node *fn;
404 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
407 read_lock_bh(&rt6_lock);
409 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
410 &skb->nh.ipv6h->saddr);
415 if ((rt->rt6i_flags & RTF_CACHE)) {
416 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
418 dst_hold(&rt->u.dst);
422 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
425 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
426 read_unlock_bh(&rt6_lock);
428 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
429 &skb->nh.ipv6h->saddr);
431 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
433 /* Race condition! In the gap, when rt6_lock was
434 released someone could insert this route. Relookup.
436 dst_release(&rt->u.dst);
439 dst_hold(&rt->u.dst);
442 read_unlock_bh(&rt6_lock);
444 rt->u.dst.lastuse = jiffies;
446 skb->dst = (struct dst_entry *) rt;
449 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
451 struct fib6_node *fn;
456 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
459 read_lock_bh(&rt6_lock);
461 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
466 if ((rt->rt6i_flags & RTF_CACHE)) {
467 rt = rt6_device_match(rt, fl->oif, strict);
469 dst_hold(&rt->u.dst);
472 if (rt->rt6i_flags & RTF_DEFAULT) {
473 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
474 rt = rt6_best_dflt(rt, fl->oif);
476 rt = rt6_device_match(rt, fl->oif, strict);
480 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
481 read_unlock_bh(&rt6_lock);
483 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
485 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
488 /* Race condition! In the gap, when rt6_lock was
489 released someone could insert this route. Relookup.
491 dst_release(&rt->u.dst);
494 dst_hold(&rt->u.dst);
497 read_unlock_bh(&rt6_lock);
499 rt->u.dst.lastuse = jiffies;
506 * Destination cache support functions
509 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
513 rt = (struct rt6_info *) dst;
515 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
522 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
524 struct rt6_info *rt = (struct rt6_info *) dst;
527 if (rt->rt6i_flags & RTF_CACHE)
528 ip6_del_rt(rt, NULL, NULL);
535 static void ip6_link_failure(struct sk_buff *skb)
539 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
541 rt = (struct rt6_info *) skb->dst;
543 if (rt->rt6i_flags&RTF_CACHE) {
544 dst_set_expires(&rt->u.dst, 0);
545 rt->rt6i_flags |= RTF_EXPIRES;
546 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
547 rt->rt6i_node->fn_sernum = -1;
551 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
553 struct rt6_info *rt6 = (struct rt6_info*)dst;
555 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
556 rt6->rt6i_flags |= RTF_MODIFIED;
557 if (mtu < IPV6_MIN_MTU)
559 dst->metrics[RTAX_MTU-1] = mtu;
563 /* Protected by rt6_lock. */
564 static struct dst_entry *ndisc_dst_gc_list;
566 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
567 struct neighbour *neigh,
568 struct in6_addr *addr,
569 int (*output)(struct sk_buff *))
571 struct rt6_info *rt = ip6_dst_alloc();
573 if (unlikely(rt == NULL))
581 neigh = ndisc_get_neigh(dev, addr);
584 rt->rt6i_nexthop = neigh;
585 rt->rt6i_expires = 0;
586 rt->rt6i_flags = RTF_LOCAL;
588 atomic_set(&rt->u.dst.__refcnt, 1);
589 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
590 rt->u.dst.output = output;
592 write_lock_bh(&rt6_lock);
593 rt->u.dst.next = ndisc_dst_gc_list;
594 ndisc_dst_gc_list = &rt->u.dst;
595 write_unlock_bh(&rt6_lock);
597 fib6_force_start_gc();
600 return (struct dst_entry *)rt;
603 int ndisc_dst_gc(int *more)
605 struct dst_entry *dst, *next, **pprev;
609 pprev = &ndisc_dst_gc_list;
611 while ((dst = *pprev) != NULL) {
612 if (!atomic_read(&dst->__refcnt)) {
625 static int ip6_dst_gc(void)
627 static unsigned expire = 30*HZ;
628 static unsigned long last_gc;
629 unsigned long now = jiffies;
631 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
632 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
638 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
639 expire = ip6_rt_gc_timeout>>1;
642 expire -= expire>>ip6_rt_gc_elasticity;
643 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
646 /* Clean host part of a prefix. Not necessary in radix tree,
647 but results in cleaner routing tables.
649 Remove it only when all the things will work!
652 static int ipv6_get_mtu(struct net_device *dev)
654 int mtu = IPV6_MIN_MTU;
655 struct inet6_dev *idev;
657 idev = in6_dev_get(dev);
659 mtu = idev->cnf.mtu6;
665 static inline unsigned int ipv6_advmss(unsigned int mtu)
667 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
669 if (mtu < ip6_rt_min_advmss)
670 mtu = ip6_rt_min_advmss;
673 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
674 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
675 * IPV6_MAXPLEN is also valid and means: "any MSS,
676 * rely only on pmtu discovery"
678 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
683 static int ipv6_get_hoplimit(struct net_device *dev)
685 int hoplimit = ipv6_devconf.hop_limit;
686 struct inet6_dev *idev;
688 idev = in6_dev_get(dev);
690 hoplimit = idev->cnf.hop_limit;
700 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
706 struct net_device *dev = NULL;
709 rta = (struct rtattr **) _rtattr;
711 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
713 #ifndef CONFIG_IPV6_SUBTREES
714 if (rtmsg->rtmsg_src_len)
717 if (rtmsg->rtmsg_metric == 0)
718 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
720 rt = ip6_dst_alloc();
725 rt->u.dst.obsolete = -1;
726 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
727 if (nlh && (r = NLMSG_DATA(nlh))) {
728 rt->rt6i_protocol = r->rtm_protocol;
730 rt->rt6i_protocol = RTPROT_BOOT;
733 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
735 if (addr_type & IPV6_ADDR_MULTICAST)
736 rt->u.dst.input = ip6_mc_input;
738 rt->u.dst.input = ip6_forward;
740 rt->u.dst.output = ip6_output;
742 if (rtmsg->rtmsg_ifindex) {
743 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
749 ipv6_addr_prefix(&rt->rt6i_dst.addr,
750 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
751 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
752 if (rt->rt6i_dst.plen == 128)
753 rt->u.dst.flags = DST_HOST;
755 #ifdef CONFIG_IPV6_SUBTREES
756 ipv6_addr_prefix(&rt->rt6i_src.addr,
757 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
758 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
761 rt->rt6i_metric = rtmsg->rtmsg_metric;
763 /* We cannot add true routes via loopback here,
764 they would result in kernel looping; promote them to reject routes
766 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
767 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
772 rt->u.dst.output = ip6_pkt_discard;
773 rt->u.dst.input = ip6_pkt_discard;
774 rt->u.dst.error = -ENETUNREACH;
775 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
779 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
780 struct in6_addr *gw_addr;
783 gw_addr = &rtmsg->rtmsg_gateway;
784 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
785 gwa_type = ipv6_addr_type(gw_addr);
787 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
788 struct rt6_info *grt;
790 /* IPv6 strictly inhibits using not link-local
791 addresses as nexthop address.
792 Otherwise, router will not able to send redirects.
793 It is very good, but in some (rare!) circumstances
794 (SIT, PtP, NBMA NOARP links) it is handy to allow
795 some exceptions. --ANK
798 if (!(gwa_type&IPV6_ADDR_UNICAST))
801 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
807 if (dev != grt->rt6i_dev) {
808 dst_release(&grt->u.dst);
815 if (!(grt->rt6i_flags&RTF_GATEWAY))
817 dst_release(&grt->u.dst);
823 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
831 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
832 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
833 if (IS_ERR(rt->rt6i_nexthop)) {
834 err = PTR_ERR(rt->rt6i_nexthop);
835 rt->rt6i_nexthop = NULL;
840 rt->rt6i_flags = rtmsg->rtmsg_flags;
843 if (rta && rta[RTA_METRICS-1]) {
844 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
845 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
847 while (RTA_OK(attr, attrlen)) {
848 unsigned flavor = attr->rta_type;
850 if (flavor > RTAX_MAX) {
854 rt->u.dst.metrics[flavor-1] =
855 *(u32 *)RTA_DATA(attr);
857 attr = RTA_NEXT(attr, attrlen);
861 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
862 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
863 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
864 IPV6_DEFAULT_MCASTHOPS;
866 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
867 ipv6_get_hoplimit(dev);
870 if (!rt->u.dst.metrics[RTAX_MTU-1])
871 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
872 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
873 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
875 return rt6_ins(rt, nlh, _rtattr);
880 dst_free((struct dst_entry *) rt);
884 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
888 write_lock_bh(&rt6_lock);
890 spin_lock_bh(&rt6_dflt_lock);
891 rt6_dflt_pointer = NULL;
892 spin_unlock_bh(&rt6_dflt_lock);
894 dst_release(&rt->u.dst);
896 err = fib6_del(rt, nlh, _rtattr);
897 write_unlock_bh(&rt6_lock);
902 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
904 struct fib6_node *fn;
908 read_lock_bh(&rt6_lock);
910 fn = fib6_locate(&ip6_routing_table,
911 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
912 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
915 for (rt = fn->leaf; rt; rt = rt->u.next) {
916 if (rtmsg->rtmsg_ifindex &&
917 (rt->rt6i_dev == NULL ||
918 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
920 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
921 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
923 if (rtmsg->rtmsg_metric &&
924 rtmsg->rtmsg_metric != rt->rt6i_metric)
926 dst_hold(&rt->u.dst);
927 read_unlock_bh(&rt6_lock);
929 return ip6_del_rt(rt, nlh, _rtattr);
932 read_unlock_bh(&rt6_lock);
940 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
941 struct neighbour *neigh, int on_link)
943 struct rt6_info *rt, *nrt;
945 /* Locate old route to this destination. */
946 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
951 if (neigh->dev != rt->rt6i_dev)
954 /* Redirect received -> path was valid.
955 Look, redirects are sent only in response to data packets,
956 so that this nexthop apparently is reachable. --ANK
958 dst_confirm(&rt->u.dst);
960 /* Duplicate redirect: silently ignore. */
961 if (neigh == rt->u.dst.neighbour)
964 /* Current route is on-link; redirect is always invalid.
966 Seems, previous statement is not true. It could
967 be node, which looks for us as on-link (f.e. proxy ndisc)
968 But then router serving it might decide, that we should
969 know truth 8)8) --ANK (980726).
971 if (!(rt->rt6i_flags&RTF_GATEWAY))
975 * RFC 2461 specifies that redirects should only be
976 * accepted if they come from the nexthop to the target.
977 * Due to the way default routers are chosen, this notion
978 * is a bit fuzzy and one might need to check all default
982 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
983 if (rt->rt6i_flags & RTF_DEFAULT) {
984 struct rt6_info *rt1;
986 read_lock(&rt6_lock);
987 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
988 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
989 dst_hold(&rt1->u.dst);
990 dst_release(&rt->u.dst);
991 read_unlock(&rt6_lock);
996 read_unlock(&rt6_lock);
999 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1000 "for redirect target\n");
1007 * We have finally decided to accept it.
1010 nrt = ip6_rt_copy(rt);
1014 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1016 nrt->rt6i_flags &= ~RTF_GATEWAY;
1018 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1019 nrt->rt6i_dst.plen = 128;
1020 nrt->u.dst.flags |= DST_HOST;
1022 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1023 nrt->rt6i_nexthop = neigh_clone(neigh);
1024 /* Reset pmtu, it may be better */
1025 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1026 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1028 if (rt6_ins(nrt, NULL, NULL))
1031 if (rt->rt6i_flags&RTF_CACHE) {
1032 ip6_del_rt(rt, NULL, NULL);
1037 dst_release(&rt->u.dst);
1042 * Handle ICMP "packet too big" messages
1043 * i.e. Path MTU discovery
1046 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1047 struct net_device *dev, u32 pmtu)
1049 struct rt6_info *rt, *nrt;
1051 if (pmtu < IPV6_MIN_MTU) {
1052 if (net_ratelimit())
1053 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1055 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1056 link MTU if the node receives a Packet Too Big message
1057 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1059 pmtu = IPV6_MIN_MTU;
1062 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1067 if (pmtu >= dst_pmtu(&rt->u.dst))
1070 /* New mtu received -> path was valid.
1071 They are sent only in response to data packets,
1072 so that this nexthop apparently is reachable. --ANK
1074 dst_confirm(&rt->u.dst);
1076 /* Host route. If it is static, it would be better
1077 not to override it, but add new one, so that
1078 when cache entry will expire old pmtu
1079 would return automatically.
1081 if (rt->rt6i_flags & RTF_CACHE) {
1082 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1083 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1084 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1089 Two cases are possible:
1090 1. It is connected route. Action: COW
1091 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1093 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1094 nrt = rt6_cow(rt, daddr, saddr);
1095 if (!nrt->u.dst.error) {
1096 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1097 /* According to RFC 1981, detecting PMTU increase shouldn't be
1098 happened within 5 mins, the recommended timer is 10 mins.
1099 Here this route expiration time is set to ip6_rt_mtu_expires
1100 which is 10 mins. After 10 mins the decreased pmtu is expired
1101 and detecting PMTU increase will be automatically happened.
1103 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1104 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1106 dst_release(&nrt->u.dst);
1108 nrt = ip6_rt_copy(rt);
1111 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1112 nrt->rt6i_dst.plen = 128;
1113 nrt->u.dst.flags |= DST_HOST;
1114 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1115 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1116 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1117 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1118 rt6_ins(nrt, NULL, NULL);
1122 dst_release(&rt->u.dst);
1126 * Misc support functions
1129 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1131 struct rt6_info *rt = ip6_dst_alloc();
1134 rt->u.dst.input = ort->u.dst.input;
1135 rt->u.dst.output = ort->u.dst.output;
1137 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1138 rt->u.dst.dev = ort->u.dst.dev;
1140 dev_hold(rt->u.dst.dev);
1141 rt->u.dst.lastuse = jiffies;
1142 rt->rt6i_expires = 0;
1144 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1145 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1146 rt->rt6i_metric = 0;
1148 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1149 #ifdef CONFIG_IPV6_SUBTREES
1150 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1156 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1158 struct rt6_info *rt;
1159 struct fib6_node *fn;
1161 fn = &ip6_routing_table;
1163 write_lock_bh(&rt6_lock);
1164 for (rt = fn->leaf; rt; rt=rt->u.next) {
1165 if (dev == rt->rt6i_dev &&
1166 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1170 dst_hold(&rt->u.dst);
1171 write_unlock_bh(&rt6_lock);
1175 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1176 struct net_device *dev)
1178 struct in6_rtmsg rtmsg;
1180 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1181 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1182 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1183 rtmsg.rtmsg_metric = 1024;
1184 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1186 rtmsg.rtmsg_ifindex = dev->ifindex;
1188 ip6_route_add(&rtmsg, NULL, NULL);
1189 return rt6_get_dflt_router(gwaddr, dev);
1192 void rt6_purge_dflt_routers(int last_resort)
1194 struct rt6_info *rt;
1198 flags = RTF_ALLONLINK;
1200 flags = RTF_DEFAULT | RTF_ADDRCONF;
1203 read_lock_bh(&rt6_lock);
1204 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1205 if (rt->rt6i_flags & flags) {
1206 dst_hold(&rt->u.dst);
1208 spin_lock_bh(&rt6_dflt_lock);
1209 rt6_dflt_pointer = NULL;
1210 spin_unlock_bh(&rt6_dflt_lock);
1212 read_unlock_bh(&rt6_lock);
1214 ip6_del_rt(rt, NULL, NULL);
1219 read_unlock_bh(&rt6_lock);
1222 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1224 struct in6_rtmsg rtmsg;
1228 case SIOCADDRT: /* Add a route */
1229 case SIOCDELRT: /* Delete a route */
1230 if (!capable(CAP_NET_ADMIN))
1232 err = copy_from_user(&rtmsg, arg,
1233 sizeof(struct in6_rtmsg));
1240 err = ip6_route_add(&rtmsg, NULL, NULL);
1243 err = ip6_route_del(&rtmsg, NULL, NULL);
1257 * Drop the packet on the floor
1260 int ip6_pkt_discard(struct sk_buff *skb)
1262 IP6_INC_STATS(Ip6OutNoRoutes);
1263 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1272 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1274 struct rt6_info *rt = ip6_dst_alloc();
1279 dev_hold(&loopback_dev);
1281 rt->u.dst.flags = DST_HOST;
1282 rt->u.dst.input = ip6_input;
1283 rt->u.dst.output = ip6_output;
1284 rt->rt6i_dev = &loopback_dev;
1285 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1286 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1287 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1288 rt->u.dst.obsolete = -1;
1290 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1292 rt->rt6i_flags |= RTF_LOCAL;
1293 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1294 if (rt->rt6i_nexthop == NULL) {
1295 dst_free((struct dst_entry *) rt);
1299 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1300 rt->rt6i_dst.plen = 128;
1301 rt6_ins(rt, NULL, NULL);
1306 /* Delete address. Warning: you should check that this address
1307 disappeared before calling this function.
1310 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1312 struct rt6_info *rt;
1315 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1317 if (rt->rt6i_dst.plen == 128)
1318 err = ip6_del_rt(rt, NULL, NULL);
1320 dst_release(&rt->u.dst);
1326 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1328 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1329 rt != &ip6_null_entry) {
1330 RT6_TRACE("deleted by ifdown %p\n", rt);
1336 void rt6_ifdown(struct net_device *dev)
1338 write_lock_bh(&rt6_lock);
1339 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1340 write_unlock_bh(&rt6_lock);
1343 struct rt6_mtu_change_arg
1345 struct net_device *dev;
1349 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1351 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1352 struct inet6_dev *idev;
1354 /* In IPv6 pmtu discovery is not optional,
1355 so that RTAX_MTU lock cannot disable it.
1356 We still use this lock to block changes
1357 caused by addrconf/ndisc.
1360 idev = __in6_dev_get(arg->dev);
1364 /* For administrative MTU increase, there is no way to discover
1365 IPv6 PMTU increase, so PMTU increase should be updated here.
1366 Since RFC 1981 doesn't include administrative MTU increase
1367 update PMTU increase is a MUST. (i.e. jumbo frame)
1370 If new MTU is less than route PMTU, this new MTU will be the
1371 lowest MTU in the path, update the route PMTU to reflect PMTU
1372 decreases; if new MTU is greater than route PMTU, and the
1373 old MTU is the lowest MTU in the path, update the route PMTU
1374 to reflect the increase. In this case if the other nodes' MTU
1375 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1378 if (rt->rt6i_dev == arg->dev &&
1379 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1380 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1381 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1382 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1383 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1384 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1388 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1390 struct rt6_mtu_change_arg arg;
1394 read_lock_bh(&rt6_lock);
1395 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1396 read_unlock_bh(&rt6_lock);
1399 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1400 struct in6_rtmsg *rtmsg)
1402 memset(rtmsg, 0, sizeof(*rtmsg));
1404 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1405 rtmsg->rtmsg_src_len = r->rtm_src_len;
1406 rtmsg->rtmsg_flags = RTF_UP;
1407 if (r->rtm_type == RTN_UNREACHABLE)
1408 rtmsg->rtmsg_flags |= RTF_REJECT;
1410 if (rta[RTA_GATEWAY-1]) {
1411 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1413 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1414 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1416 if (rta[RTA_DST-1]) {
1417 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1419 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1421 if (rta[RTA_SRC-1]) {
1422 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1424 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1426 if (rta[RTA_OIF-1]) {
1427 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1429 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1431 if (rta[RTA_PRIORITY-1]) {
1432 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1434 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1439 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1441 struct rtmsg *r = NLMSG_DATA(nlh);
1442 struct in6_rtmsg rtmsg;
1444 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1446 return ip6_route_del(&rtmsg, nlh, arg);
1449 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1451 struct rtmsg *r = NLMSG_DATA(nlh);
1452 struct in6_rtmsg rtmsg;
1454 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1456 return ip6_route_add(&rtmsg, nlh, arg);
1459 struct rt6_rtnl_dump_arg
1461 struct sk_buff *skb;
1462 struct netlink_callback *cb;
1465 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1466 struct in6_addr *dst,
1467 struct in6_addr *src,
1469 int type, u32 pid, u32 seq,
1470 struct nlmsghdr *in_nlh, int prefix)
1473 struct nlmsghdr *nlh;
1474 unsigned char *b = skb->tail;
1475 struct rta_cacheinfo ci;
1477 if (prefix) { /* user wants prefix routes only */
1478 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1479 /* success since this is not a prefix route */
1484 if (!pid && in_nlh) {
1485 pid = in_nlh->nlmsg_pid;
1488 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1489 rtm = NLMSG_DATA(nlh);
1490 rtm->rtm_family = AF_INET6;
1491 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1492 rtm->rtm_src_len = rt->rt6i_src.plen;
1494 rtm->rtm_table = RT_TABLE_MAIN;
1495 if (rt->rt6i_flags&RTF_REJECT)
1496 rtm->rtm_type = RTN_UNREACHABLE;
1497 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1498 rtm->rtm_type = RTN_LOCAL;
1500 rtm->rtm_type = RTN_UNICAST;
1502 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1503 rtm->rtm_protocol = rt->rt6i_protocol;
1504 if (rt->rt6i_flags&RTF_DYNAMIC)
1505 rtm->rtm_protocol = RTPROT_REDIRECT;
1506 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1507 rtm->rtm_protocol = RTPROT_KERNEL;
1508 else if (rt->rt6i_flags&RTF_DEFAULT)
1509 rtm->rtm_protocol = RTPROT_RA;
1511 if (rt->rt6i_flags&RTF_CACHE)
1512 rtm->rtm_flags |= RTM_F_CLONED;
1515 RTA_PUT(skb, RTA_DST, 16, dst);
1516 rtm->rtm_dst_len = 128;
1517 } else if (rtm->rtm_dst_len)
1518 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1519 #ifdef CONFIG_IPV6_SUBTREES
1521 RTA_PUT(skb, RTA_SRC, 16, src);
1522 rtm->rtm_src_len = 128;
1523 } else if (rtm->rtm_src_len)
1524 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1527 RTA_PUT(skb, RTA_IIF, 4, &iif);
1529 struct in6_addr saddr_buf;
1530 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1531 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1533 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1534 goto rtattr_failure;
1535 if (rt->u.dst.neighbour)
1536 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1538 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1539 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1540 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1541 if (rt->rt6i_expires)
1542 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1545 ci.rta_used = rt->u.dst.__use;
1546 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1547 ci.rta_error = rt->u.dst.error;
1551 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1552 nlh->nlmsg_len = skb->tail - b;
1557 skb_trim(skb, b - skb->data);
1561 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1563 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1566 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1567 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1568 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1572 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1573 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1577 static int fib6_dump_node(struct fib6_walker_t *w)
1580 struct rt6_info *rt;
1582 for (rt = w->leaf; rt; rt = rt->u.next) {
1583 res = rt6_dump_route(rt, w->args);
1585 /* Frame is full, suspend walking */
1595 static void fib6_dump_end(struct netlink_callback *cb)
1597 struct fib6_walker_t *w = (void*)cb->args[0];
1601 fib6_walker_unlink(w);
1605 cb->done = (void*)cb->args[1];
1610 static int fib6_dump_done(struct netlink_callback *cb)
1613 return cb->done(cb);
1616 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1618 struct rt6_rtnl_dump_arg arg;
1619 struct fib6_walker_t *w;
1625 w = (void*)cb->args[0];
1629 * 1. hook callback destructor.
1631 cb->args[1] = (long)cb->done;
1632 cb->done = fib6_dump_done;
1635 * 2. allocate and initialize walker.
1637 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1640 RT6_TRACE("dump<%p", w);
1641 memset(w, 0, sizeof(*w));
1642 w->root = &ip6_routing_table;
1643 w->func = fib6_dump_node;
1645 cb->args[0] = (long)w;
1646 read_lock_bh(&rt6_lock);
1648 read_unlock_bh(&rt6_lock);
1651 read_lock_bh(&rt6_lock);
1652 res = fib6_walk_continue(w);
1653 read_unlock_bh(&rt6_lock);
1656 if (res <= 0 && skb->len == 0)
1657 RT6_TRACE("%p>dump end\n", w);
1659 res = res < 0 ? res : skb->len;
1660 /* res < 0 is an error. (really, impossible)
1661 res == 0 means that dump is complete, but skb still can contain data.
1662 res > 0 dump is not complete, but frame is full.
1664 /* Destroy walker, if dump of this table is complete. */
1670 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1672 struct rtattr **rta = arg;
1675 struct sk_buff *skb;
1677 struct rt6_info *rt;
1679 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1683 /* Reserve room for dummy headers, this skb can pass
1684 through good chunk of routing engine.
1686 skb->mac.raw = skb->data;
1687 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1689 memset(&fl, 0, sizeof(fl));
1691 ipv6_addr_copy(&fl.fl6_src,
1692 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1694 ipv6_addr_copy(&fl.fl6_dst,
1695 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1698 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1701 struct net_device *dev;
1702 dev = __dev_get_by_index(iif);
1711 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1713 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1715 skb->dst = &rt->u.dst;
1717 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1718 err = rt6_fill_node(skb, rt,
1719 &fl.fl6_dst, &fl.fl6_src,
1721 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1722 nlh->nlmsg_seq, nlh, 0);
1728 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1738 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1740 struct sk_buff *skb;
1741 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1743 skb = alloc_skb(size, gfp_any());
1745 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1748 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1750 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1753 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1754 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1761 #ifdef CONFIG_PROC_FS
1763 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1774 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1776 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1779 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1784 if (arg->len >= arg->length)
1787 for (i=0; i<16; i++) {
1788 sprintf(arg->buffer + arg->len, "%02x",
1789 rt->rt6i_dst.addr.s6_addr[i]);
1792 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1795 #ifdef CONFIG_IPV6_SUBTREES
1796 for (i=0; i<16; i++) {
1797 sprintf(arg->buffer + arg->len, "%02x",
1798 rt->rt6i_src.addr.s6_addr[i]);
1801 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1804 sprintf(arg->buffer + arg->len,
1805 "00000000000000000000000000000000 00 ");
1809 if (rt->rt6i_nexthop) {
1810 for (i=0; i<16; i++) {
1811 sprintf(arg->buffer + arg->len, "%02x",
1812 rt->rt6i_nexthop->primary_key[i]);
1816 sprintf(arg->buffer + arg->len,
1817 "00000000000000000000000000000000");
1820 arg->len += sprintf(arg->buffer + arg->len,
1821 " %08x %08x %08x %08x %8s\n",
1822 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1823 rt->u.dst.__use, rt->rt6i_flags,
1824 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1828 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1830 struct rt6_proc_arg arg;
1831 arg.buffer = buffer;
1832 arg.offset = offset;
1833 arg.length = length;
1837 read_lock_bh(&rt6_lock);
1838 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1839 read_unlock_bh(&rt6_lock);
1843 *start += offset % RT6_INFO_LEN;
1845 arg.len -= offset % RT6_INFO_LEN;
1847 if (arg.len > length)
1855 extern struct rt6_statistics rt6_stats;
1857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1859 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1860 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1861 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1862 rt6_stats.fib_rt_cache,
1863 atomic_read(&ip6_dst_ops.entries),
1864 rt6_stats.fib_discarded_routes);
1869 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1871 return single_open(file, rt6_stats_seq_show, NULL);
1874 static struct file_operations rt6_stats_seq_fops = {
1875 .owner = THIS_MODULE,
1876 .open = rt6_stats_seq_open,
1878 .llseek = seq_lseek,
1879 .release = single_release,
1881 #endif /* CONFIG_PROC_FS */
1883 #ifdef CONFIG_SYSCTL
1885 static int flush_delay;
1888 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1889 void __user *buffer, size_t *lenp)
1892 proc_dointvec(ctl, write, filp, buffer, lenp);
1893 if (flush_delay < 0)
1895 fib6_run_gc((unsigned long)flush_delay);
1901 ctl_table ipv6_route_table[] = {
1903 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1904 .procname = "flush",
1905 .data = &flush_delay,
1906 .maxlen = sizeof(int),
1908 .proc_handler = &ipv6_sysctl_rtcache_flush
1911 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1912 .procname = "gc_thresh",
1913 .data = &ip6_dst_ops.gc_thresh,
1914 .maxlen = sizeof(int),
1916 .proc_handler = &proc_dointvec,
1919 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1920 .procname = "max_size",
1921 .data = &ip6_rt_max_size,
1922 .maxlen = sizeof(int),
1924 .proc_handler = &proc_dointvec,
1927 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1928 .procname = "gc_min_interval",
1929 .data = &ip6_rt_gc_min_interval,
1930 .maxlen = sizeof(int),
1932 .proc_handler = &proc_dointvec_jiffies,
1933 .strategy = &sysctl_jiffies,
1936 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1937 .procname = "gc_timeout",
1938 .data = &ip6_rt_gc_timeout,
1939 .maxlen = sizeof(int),
1941 .proc_handler = &proc_dointvec_jiffies,
1942 .strategy = &sysctl_jiffies,
1945 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
1946 .procname = "gc_interval",
1947 .data = &ip6_rt_gc_interval,
1948 .maxlen = sizeof(int),
1950 .proc_handler = &proc_dointvec_jiffies,
1951 .strategy = &sysctl_jiffies,
1954 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
1955 .procname = "gc_elasticity",
1956 .data = &ip6_rt_gc_elasticity,
1957 .maxlen = sizeof(int),
1959 .proc_handler = &proc_dointvec_jiffies,
1960 .strategy = &sysctl_jiffies,
1963 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
1964 .procname = "mtu_expires",
1965 .data = &ip6_rt_mtu_expires,
1966 .maxlen = sizeof(int),
1968 .proc_handler = &proc_dointvec_jiffies,
1969 .strategy = &sysctl_jiffies,
1972 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
1973 .procname = "min_adv_mss",
1974 .data = &ip6_rt_min_advmss,
1975 .maxlen = sizeof(int),
1977 .proc_handler = &proc_dointvec_jiffies,
1978 .strategy = &sysctl_jiffies,
1985 void __init ip6_route_init(void)
1987 struct proc_dir_entry *p;
1989 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1990 sizeof(struct rt6_info),
1991 0, SLAB_HWCACHE_ALIGN,
1993 if (!ip6_dst_ops.kmem_cachep)
1994 panic("cannot create ip6_dst_cache");
1997 #ifdef CONFIG_PROC_FS
1998 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2000 p->owner = THIS_MODULE;
2002 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2009 void __exit ip6_route_cleanup(void)
2011 #ifdef CONFIG_PROC_FS
2012 proc_net_remove("ipv6_route");
2013 proc_net_remove("rt6_stats");
2020 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);