2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int ip6_dst_gc(void);
88 static int ip6_pkt_discard(struct sk_buff *skb);
89 static void ip6_link_failure(struct sk_buff *skb);
90 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
92 static struct dst_ops ip6_dst_ops = {
94 .protocol = __constant_htons(ETH_P_IPV6),
97 .check = ip6_dst_check,
98 .negative_advice = ip6_negative_advice,
99 .link_failure = ip6_link_failure,
100 .update_pmtu = ip6_rt_update_pmtu,
101 .entry_size = sizeof(struct rt6_info),
104 struct rt6_info ip6_null_entry = {
107 .__refcnt = ATOMIC_INIT(1),
109 .dev = &loopback_dev,
111 .error = -ENETUNREACH,
112 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
113 .input = ip6_pkt_discard,
114 .output = ip6_pkt_discard,
116 .path = (struct dst_entry*)&ip6_null_entry,
119 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
120 .rt6i_metric = ~(u32) 0,
121 .rt6i_ref = ATOMIC_INIT(1),
124 struct fib6_node ip6_routing_table = {
125 .leaf = &ip6_null_entry,
126 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
129 /* Protects all the ip6 fib */
131 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
134 /* allocate dst with ip6_dst_ops */
135 static __inline__ struct rt6_info *ip6_dst_alloc(void)
137 return dst_alloc(&ip6_dst_ops);
141 * Route lookup. Any rt6_lock is implied.
144 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
148 struct rt6_info *local = NULL;
149 struct rt6_info *sprt;
152 for (sprt = rt; sprt; sprt = sprt->u.next) {
153 struct net_device *dev = sprt->rt6i_dev;
154 if (dev->ifindex == oif)
156 if (dev->flags&IFF_LOOPBACK)
164 return &ip6_null_entry;
170 * pointer to the last default router chosen. BH is disabled locally.
172 static struct rt6_info *rt6_dflt_pointer;
173 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
175 /* Default Router Selection (RFC 2461 6.3.6) */
176 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
178 struct rt6_info *match = NULL;
179 struct rt6_info *sprt;
182 for (sprt = rt; sprt; sprt = sprt->u.next) {
183 struct neighbour *neigh;
188 sprt->rt6i_dev->ifindex == oif))
191 if (sprt == rt6_dflt_pointer)
194 if ((neigh = sprt->rt6i_nexthop) != NULL) {
195 read_lock_bh(&neigh->lock);
196 switch (neigh->nud_state) {
214 read_unlock_bh(&neigh->lock);
217 read_unlock_bh(&neigh->lock);
222 if (m > mpri || m >= 12) {
226 /* we choose the last default router if it
227 * is in (probably) reachable state.
228 * If route changed, we should do pmtu
229 * discovery. --yoshfuji
236 spin_lock(&rt6_dflt_lock);
239 * No default routers are known to be reachable.
242 if (rt6_dflt_pointer) {
243 for (sprt = rt6_dflt_pointer->u.next;
244 sprt; sprt = sprt->u.next) {
245 if (sprt->u.dst.obsolete <= 0 &&
246 sprt->u.dst.error == 0) {
253 sprt = sprt->u.next) {
254 if (sprt->u.dst.obsolete <= 0 &&
255 sprt->u.dst.error == 0) {
259 if (sprt == rt6_dflt_pointer)
266 if (rt6_dflt_pointer != match)
267 RT6_TRACE("changed default router: %p->%p\n",
268 rt6_dflt_pointer, match);
269 rt6_dflt_pointer = match;
271 spin_unlock(&rt6_dflt_lock);
275 * Last Resort: if no default routers found,
276 * use addrconf default route.
277 * We don't record this route.
279 for (sprt = ip6_routing_table.leaf;
280 sprt; sprt = sprt->u.next) {
281 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
284 sprt->rt6i_dev->ifindex == oif))) {
290 /* no default route. give up. */
291 match = &ip6_null_entry;
298 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
301 struct fib6_node *fn;
304 read_lock_bh(&rt6_lock);
305 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
306 rt = rt6_device_match(fn->leaf, oif, strict);
307 dst_hold(&rt->u.dst);
309 read_unlock_bh(&rt6_lock);
311 rt->u.dst.lastuse = jiffies;
312 if (rt->u.dst.error == 0)
314 dst_release(&rt->u.dst);
318 /* rt6_ins is called with FREE rt6_lock.
319 It takes new route entry, the addition fails by any reason the
320 route is freed. In any case, if caller does not hold it, it may
324 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
328 write_lock_bh(&rt6_lock);
329 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
330 write_unlock_bh(&rt6_lock);
335 /* No rt6_lock! If COW failed, the function returns dead route entry
336 with dst->error set to errno value.
339 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
340 struct in6_addr *saddr)
349 rt = ip6_rt_copy(ort);
352 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
354 if (!(rt->rt6i_flags&RTF_GATEWAY))
355 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
357 rt->rt6i_dst.plen = 128;
358 rt->rt6i_flags |= RTF_CACHE;
359 rt->u.dst.flags |= DST_HOST;
361 #ifdef CONFIG_IPV6_SUBTREES
362 if (rt->rt6i_src.plen && saddr) {
363 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
364 rt->rt6i_src.plen = 128;
368 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
370 dst_hold(&rt->u.dst);
372 err = rt6_ins(rt, NULL, NULL);
376 rt->u.dst.error = err;
380 dst_hold(&ip6_null_entry.u.dst);
381 return &ip6_null_entry;
384 #define BACKTRACK() \
385 if (rt == &ip6_null_entry && strict) { \
386 while ((fn = fn->parent) != NULL) { \
387 if (fn->fn_flags & RTN_ROOT) { \
388 dst_hold(&rt->u.dst); \
391 if (fn->fn_flags & RTN_RTINFO) \
397 void ip6_route_input(struct sk_buff *skb)
399 struct fib6_node *fn;
404 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
407 read_lock_bh(&rt6_lock);
409 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
410 &skb->nh.ipv6h->saddr);
415 if ((rt->rt6i_flags & RTF_CACHE)) {
416 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
418 dst_hold(&rt->u.dst);
422 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
425 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
426 read_unlock_bh(&rt6_lock);
428 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
429 &skb->nh.ipv6h->saddr);
431 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
433 /* Race condition! In the gap, when rt6_lock was
434 released someone could insert this route. Relookup.
436 dst_release(&rt->u.dst);
439 dst_hold(&rt->u.dst);
442 read_unlock_bh(&rt6_lock);
444 rt->u.dst.lastuse = jiffies;
446 skb->dst = (struct dst_entry *) rt;
449 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
451 struct fib6_node *fn;
456 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
459 read_lock_bh(&rt6_lock);
461 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
466 if ((rt->rt6i_flags & RTF_CACHE)) {
467 rt = rt6_device_match(rt, fl->oif, strict);
469 dst_hold(&rt->u.dst);
472 if (rt->rt6i_flags & RTF_DEFAULT) {
473 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
474 rt = rt6_best_dflt(rt, fl->oif);
476 rt = rt6_device_match(rt, fl->oif, strict);
480 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
481 read_unlock_bh(&rt6_lock);
483 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
485 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
488 /* Race condition! In the gap, when rt6_lock was
489 released someone could insert this route. Relookup.
491 dst_release(&rt->u.dst);
494 dst_hold(&rt->u.dst);
497 read_unlock_bh(&rt6_lock);
499 rt->u.dst.lastuse = jiffies;
506 * Destination cache support functions
509 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
513 rt = (struct rt6_info *) dst;
515 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
522 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
524 struct rt6_info *rt = (struct rt6_info *) dst;
527 if (rt->rt6i_flags & RTF_CACHE)
528 ip6_del_rt(rt, NULL, NULL);
535 static void ip6_link_failure(struct sk_buff *skb)
539 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
541 rt = (struct rt6_info *) skb->dst;
543 if (rt->rt6i_flags&RTF_CACHE) {
544 dst_set_expires(&rt->u.dst, 0);
545 rt->rt6i_flags |= RTF_EXPIRES;
546 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
547 rt->rt6i_node->fn_sernum = -1;
551 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
553 struct rt6_info *rt6 = (struct rt6_info*)dst;
555 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
556 rt6->rt6i_flags |= RTF_MODIFIED;
557 dst->metrics[RTAX_MTU-1] = mtu;
561 /* Protected by rt6_lock. */
562 static struct dst_entry *ndisc_dst_gc_list;
564 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
565 struct neighbour *neigh,
566 struct in6_addr *addr,
567 int (*output)(struct sk_buff *))
569 struct rt6_info *rt = ip6_dst_alloc();
571 if (unlikely(rt == NULL))
579 neigh = ndisc_get_neigh(dev, addr);
582 rt->rt6i_nexthop = neigh;
583 rt->rt6i_expires = 0;
584 rt->rt6i_flags = RTF_LOCAL;
586 atomic_set(&rt->u.dst.__refcnt, 1);
587 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
588 rt->u.dst.output = output;
590 write_lock_bh(&rt6_lock);
591 rt->u.dst.next = ndisc_dst_gc_list;
592 ndisc_dst_gc_list = &rt->u.dst;
593 write_unlock_bh(&rt6_lock);
595 fib6_force_start_gc();
598 return (struct dst_entry *)rt;
601 int ndisc_dst_gc(int *more)
603 struct dst_entry *dst, *next, **pprev;
607 pprev = &ndisc_dst_gc_list;
609 while ((dst = *pprev) != NULL) {
610 if (!atomic_read(&dst->__refcnt)) {
623 static int ip6_dst_gc(void)
625 static unsigned expire = 30*HZ;
626 static unsigned long last_gc;
627 unsigned long now = jiffies;
629 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
630 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
636 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
637 expire = ip6_rt_gc_timeout>>1;
640 expire -= expire>>ip6_rt_gc_elasticity;
641 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
644 /* Clean host part of a prefix. Not necessary in radix tree,
645 but results in cleaner routing tables.
647 Remove it only when all the things will work!
650 static int ipv6_get_mtu(struct net_device *dev)
652 int mtu = IPV6_MIN_MTU;
653 struct inet6_dev *idev;
655 idev = in6_dev_get(dev);
657 mtu = idev->cnf.mtu6;
663 static inline unsigned int ipv6_advmss(unsigned int mtu)
665 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
667 if (mtu < ip6_rt_min_advmss)
668 mtu = ip6_rt_min_advmss;
671 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
672 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
673 * IPV6_MAXPLEN is also valid and means: "any MSS,
674 * rely only on pmtu discovery"
676 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
681 static int ipv6_get_hoplimit(struct net_device *dev)
683 int hoplimit = ipv6_devconf.hop_limit;
684 struct inet6_dev *idev;
686 idev = in6_dev_get(dev);
688 hoplimit = idev->cnf.hop_limit;
698 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
704 struct net_device *dev = NULL;
707 rta = (struct rtattr **) _rtattr;
709 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
711 #ifndef CONFIG_IPV6_SUBTREES
712 if (rtmsg->rtmsg_src_len)
715 if (rtmsg->rtmsg_metric == 0)
716 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
718 rt = ip6_dst_alloc();
723 rt->u.dst.obsolete = -1;
724 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
725 if (nlh && (r = NLMSG_DATA(nlh))) {
726 rt->rt6i_protocol = r->rtm_protocol;
728 rt->rt6i_protocol = RTPROT_BOOT;
731 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
733 if (addr_type & IPV6_ADDR_MULTICAST)
734 rt->u.dst.input = ip6_mc_input;
736 rt->u.dst.input = ip6_forward;
738 rt->u.dst.output = ip6_output;
740 if (rtmsg->rtmsg_ifindex) {
741 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
747 ipv6_addr_prefix(&rt->rt6i_dst.addr,
748 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
749 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
750 if (rt->rt6i_dst.plen == 128)
751 rt->u.dst.flags = DST_HOST;
753 #ifdef CONFIG_IPV6_SUBTREES
754 ipv6_addr_prefix(&rt->rt6i_src.addr,
755 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
756 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
759 rt->rt6i_metric = rtmsg->rtmsg_metric;
761 /* We cannot add true routes via loopback here,
762 they would result in kernel looping; promote them to reject routes
764 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
765 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
770 rt->u.dst.output = ip6_pkt_discard;
771 rt->u.dst.input = ip6_pkt_discard;
772 rt->u.dst.error = -ENETUNREACH;
773 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
777 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
778 struct in6_addr *gw_addr;
781 gw_addr = &rtmsg->rtmsg_gateway;
782 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
783 gwa_type = ipv6_addr_type(gw_addr);
785 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
786 struct rt6_info *grt;
788 /* IPv6 strictly inhibits using not link-local
789 addresses as nexthop address.
790 Otherwise, router will not able to send redirects.
791 It is very good, but in some (rare!) circumstances
792 (SIT, PtP, NBMA NOARP links) it is handy to allow
793 some exceptions. --ANK
796 if (!(gwa_type&IPV6_ADDR_UNICAST))
799 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
805 if (dev != grt->rt6i_dev) {
806 dst_release(&grt->u.dst);
813 if (!(grt->rt6i_flags&RTF_GATEWAY))
815 dst_release(&grt->u.dst);
821 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
829 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
830 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
831 if (IS_ERR(rt->rt6i_nexthop)) {
832 err = PTR_ERR(rt->rt6i_nexthop);
833 rt->rt6i_nexthop = NULL;
838 rt->rt6i_flags = rtmsg->rtmsg_flags;
841 if (rta && rta[RTA_METRICS-1]) {
842 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
843 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
845 while (RTA_OK(attr, attrlen)) {
846 unsigned flavor = attr->rta_type;
848 if (flavor > RTAX_MAX) {
852 rt->u.dst.metrics[flavor-1] =
853 *(u32 *)RTA_DATA(attr);
855 attr = RTA_NEXT(attr, attrlen);
859 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
860 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
861 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
862 IPV6_DEFAULT_MCASTHOPS;
864 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
865 ipv6_get_hoplimit(dev);
868 if (!rt->u.dst.metrics[RTAX_MTU-1])
869 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
870 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
871 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
873 return rt6_ins(rt, nlh, _rtattr);
878 dst_free((struct dst_entry *) rt);
882 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
886 write_lock_bh(&rt6_lock);
888 spin_lock_bh(&rt6_dflt_lock);
889 rt6_dflt_pointer = NULL;
890 spin_unlock_bh(&rt6_dflt_lock);
892 dst_release(&rt->u.dst);
894 err = fib6_del(rt, nlh, _rtattr);
895 write_unlock_bh(&rt6_lock);
900 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
902 struct fib6_node *fn;
906 read_lock_bh(&rt6_lock);
908 fn = fib6_locate(&ip6_routing_table,
909 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
910 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
913 for (rt = fn->leaf; rt; rt = rt->u.next) {
914 if (rtmsg->rtmsg_ifindex &&
915 (rt->rt6i_dev == NULL ||
916 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
918 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
919 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
921 if (rtmsg->rtmsg_metric &&
922 rtmsg->rtmsg_metric != rt->rt6i_metric)
924 dst_hold(&rt->u.dst);
925 read_unlock_bh(&rt6_lock);
927 return ip6_del_rt(rt, nlh, _rtattr);
930 read_unlock_bh(&rt6_lock);
938 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
939 struct neighbour *neigh, int on_link)
941 struct rt6_info *rt, *nrt;
943 /* Locate old route to this destination. */
944 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
949 if (neigh->dev != rt->rt6i_dev)
952 /* Redirect received -> path was valid.
953 Look, redirects are sent only in response to data packets,
954 so that this nexthop apparently is reachable. --ANK
956 dst_confirm(&rt->u.dst);
958 /* Duplicate redirect: silently ignore. */
959 if (neigh == rt->u.dst.neighbour)
962 /* Current route is on-link; redirect is always invalid.
964 Seems, previous statement is not true. It could
965 be node, which looks for us as on-link (f.e. proxy ndisc)
966 But then router serving it might decide, that we should
967 know truth 8)8) --ANK (980726).
969 if (!(rt->rt6i_flags&RTF_GATEWAY))
973 * RFC 2461 specifies that redirects should only be
974 * accepted if they come from the nexthop to the target.
975 * Due to the way default routers are chosen, this notion
976 * is a bit fuzzy and one might need to check all default
980 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
981 if (rt->rt6i_flags & RTF_DEFAULT) {
982 struct rt6_info *rt1;
984 read_lock(&rt6_lock);
985 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
986 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
987 dst_hold(&rt1->u.dst);
988 dst_release(&rt->u.dst);
989 read_unlock(&rt6_lock);
994 read_unlock(&rt6_lock);
997 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
998 "for redirect target\n");
1005 * We have finally decided to accept it.
1008 nrt = ip6_rt_copy(rt);
1012 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1014 nrt->rt6i_flags &= ~RTF_GATEWAY;
1016 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1017 nrt->rt6i_dst.plen = 128;
1018 nrt->u.dst.flags |= DST_HOST;
1020 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1021 nrt->rt6i_nexthop = neigh_clone(neigh);
1022 /* Reset pmtu, it may be better */
1023 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1024 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1026 if (rt6_ins(nrt, NULL, NULL))
1029 if (rt->rt6i_flags&RTF_CACHE) {
1030 ip6_del_rt(rt, NULL, NULL);
1035 dst_release(&rt->u.dst);
1040 * Handle ICMP "packet too big" messages
1041 * i.e. Path MTU discovery
1044 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1045 struct net_device *dev, u32 pmtu)
1047 struct rt6_info *rt, *nrt;
1049 if (pmtu < IPV6_MIN_MTU) {
1050 if (net_ratelimit())
1051 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1053 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1054 link MTU if the node receives a Packet Too Big message
1055 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1057 pmtu = IPV6_MIN_MTU;
1060 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1065 if (pmtu >= dst_pmtu(&rt->u.dst))
1068 /* New mtu received -> path was valid.
1069 They are sent only in response to data packets,
1070 so that this nexthop apparently is reachable. --ANK
1072 dst_confirm(&rt->u.dst);
1074 /* Host route. If it is static, it would be better
1075 not to override it, but add new one, so that
1076 when cache entry will expire old pmtu
1077 would return automatically.
1079 if (rt->rt6i_flags & RTF_CACHE) {
1080 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1081 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1082 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1087 Two cases are possible:
1088 1. It is connected route. Action: COW
1089 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1091 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1092 nrt = rt6_cow(rt, daddr, saddr);
1093 if (!nrt->u.dst.error) {
1094 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1095 /* According to RFC 1981, detecting PMTU increase shouldn't be
1096 happened within 5 mins, the recommended timer is 10 mins.
1097 Here this route expiration time is set to ip6_rt_mtu_expires
1098 which is 10 mins. After 10 mins the decreased pmtu is expired
1099 and detecting PMTU increase will be automatically happened.
1101 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1102 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1104 dst_release(&nrt->u.dst);
1106 nrt = ip6_rt_copy(rt);
1109 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1110 nrt->rt6i_dst.plen = 128;
1111 nrt->u.dst.flags |= DST_HOST;
1112 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1113 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1114 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1115 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1116 rt6_ins(nrt, NULL, NULL);
1120 dst_release(&rt->u.dst);
1124 * Misc support functions
1127 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1129 struct rt6_info *rt = ip6_dst_alloc();
1132 rt->u.dst.input = ort->u.dst.input;
1133 rt->u.dst.output = ort->u.dst.output;
1135 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1136 rt->u.dst.dev = ort->u.dst.dev;
1138 dev_hold(rt->u.dst.dev);
1139 rt->u.dst.lastuse = jiffies;
1140 rt->rt6i_expires = 0;
1142 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1143 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1144 rt->rt6i_metric = 0;
1146 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1147 #ifdef CONFIG_IPV6_SUBTREES
1148 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1154 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1156 struct rt6_info *rt;
1157 struct fib6_node *fn;
1159 fn = &ip6_routing_table;
1161 write_lock_bh(&rt6_lock);
1162 for (rt = fn->leaf; rt; rt=rt->u.next) {
1163 if (dev == rt->rt6i_dev &&
1164 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1168 dst_hold(&rt->u.dst);
1169 write_unlock_bh(&rt6_lock);
1173 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1174 struct net_device *dev)
1176 struct in6_rtmsg rtmsg;
1178 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1179 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1180 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1181 rtmsg.rtmsg_metric = 1024;
1182 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1184 rtmsg.rtmsg_ifindex = dev->ifindex;
1186 ip6_route_add(&rtmsg, NULL, NULL);
1187 return rt6_get_dflt_router(gwaddr, dev);
1190 void rt6_purge_dflt_routers(int last_resort)
1192 struct rt6_info *rt;
1196 flags = RTF_ALLONLINK;
1198 flags = RTF_DEFAULT | RTF_ADDRCONF;
1201 read_lock_bh(&rt6_lock);
1202 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1203 if (rt->rt6i_flags & flags) {
1204 dst_hold(&rt->u.dst);
1206 spin_lock_bh(&rt6_dflt_lock);
1207 rt6_dflt_pointer = NULL;
1208 spin_unlock_bh(&rt6_dflt_lock);
1210 read_unlock_bh(&rt6_lock);
1212 ip6_del_rt(rt, NULL, NULL);
1217 read_unlock_bh(&rt6_lock);
1220 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1222 struct in6_rtmsg rtmsg;
1226 case SIOCADDRT: /* Add a route */
1227 case SIOCDELRT: /* Delete a route */
1228 if (!capable(CAP_NET_ADMIN))
1230 err = copy_from_user(&rtmsg, arg,
1231 sizeof(struct in6_rtmsg));
1238 err = ip6_route_add(&rtmsg, NULL, NULL);
1241 err = ip6_route_del(&rtmsg, NULL, NULL);
1255 * Drop the packet on the floor
1258 int ip6_pkt_discard(struct sk_buff *skb)
1260 IP6_INC_STATS(Ip6OutNoRoutes);
1261 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1270 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1272 struct rt6_info *rt = ip6_dst_alloc();
1277 dev_hold(&loopback_dev);
1279 rt->u.dst.flags = DST_HOST;
1280 rt->u.dst.input = ip6_input;
1281 rt->u.dst.output = ip6_output;
1282 rt->rt6i_dev = &loopback_dev;
1283 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1284 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1285 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1286 rt->u.dst.obsolete = -1;
1288 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1290 rt->rt6i_flags |= RTF_LOCAL;
1291 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1292 if (rt->rt6i_nexthop == NULL) {
1293 dst_free((struct dst_entry *) rt);
1297 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1298 rt->rt6i_dst.plen = 128;
1299 rt6_ins(rt, NULL, NULL);
1304 /* Delete address. Warning: you should check that this address
1305 disappeared before calling this function.
1308 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1310 struct rt6_info *rt;
1313 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1315 if (rt->rt6i_dst.plen == 128)
1316 err = ip6_del_rt(rt, NULL, NULL);
1318 dst_release(&rt->u.dst);
1324 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1326 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1327 rt != &ip6_null_entry) {
1328 RT6_TRACE("deleted by ifdown %p\n", rt);
1334 void rt6_ifdown(struct net_device *dev)
1336 write_lock_bh(&rt6_lock);
1337 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1338 write_unlock_bh(&rt6_lock);
1341 struct rt6_mtu_change_arg
1343 struct net_device *dev;
1347 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1349 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1350 struct inet6_dev *idev;
1352 /* In IPv6 pmtu discovery is not optional,
1353 so that RTAX_MTU lock cannot disable it.
1354 We still use this lock to block changes
1355 caused by addrconf/ndisc.
1358 idev = __in6_dev_get(arg->dev);
1362 /* For administrative MTU increase, there is no way to discover
1363 IPv6 PMTU increase, so PMTU increase should be updated here.
1364 Since RFC 1981 doesn't include administrative MTU increase
1365 update PMTU increase is a MUST. (i.e. jumbo frame)
1368 If new MTU is less than route PMTU, this new MTU will be the
1369 lowest MTU in the path, update the route PMTU to reflect PMTU
1370 decreases; if new MTU is greater than route PMTU, and the
1371 old MTU is the lowest MTU in the path, update the route PMTU
1372 to reflect the increase. In this case if the other nodes' MTU
1373 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1376 if (rt->rt6i_dev == arg->dev &&
1377 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1378 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1379 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1380 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1381 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1382 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1386 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1388 struct rt6_mtu_change_arg arg;
1392 read_lock_bh(&rt6_lock);
1393 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1394 read_unlock_bh(&rt6_lock);
1397 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1398 struct in6_rtmsg *rtmsg)
1400 memset(rtmsg, 0, sizeof(*rtmsg));
1402 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1403 rtmsg->rtmsg_src_len = r->rtm_src_len;
1404 rtmsg->rtmsg_flags = RTF_UP;
1405 if (r->rtm_type == RTN_UNREACHABLE)
1406 rtmsg->rtmsg_flags |= RTF_REJECT;
1408 if (rta[RTA_GATEWAY-1]) {
1409 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1411 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1412 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1414 if (rta[RTA_DST-1]) {
1415 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1417 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1419 if (rta[RTA_SRC-1]) {
1420 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1422 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1424 if (rta[RTA_OIF-1]) {
1425 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1427 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1429 if (rta[RTA_PRIORITY-1]) {
1430 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1432 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1437 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1439 struct rtmsg *r = NLMSG_DATA(nlh);
1440 struct in6_rtmsg rtmsg;
1442 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1444 return ip6_route_del(&rtmsg, nlh, arg);
1447 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1449 struct rtmsg *r = NLMSG_DATA(nlh);
1450 struct in6_rtmsg rtmsg;
1452 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1454 return ip6_route_add(&rtmsg, nlh, arg);
1457 struct rt6_rtnl_dump_arg
1459 struct sk_buff *skb;
1460 struct netlink_callback *cb;
1463 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1464 struct in6_addr *dst,
1465 struct in6_addr *src,
1467 int type, u32 pid, u32 seq,
1468 struct nlmsghdr *in_nlh, int prefix)
1471 struct nlmsghdr *nlh;
1472 unsigned char *b = skb->tail;
1473 struct rta_cacheinfo ci;
1475 if (prefix) { /* user wants prefix routes only */
1476 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1477 /* success since this is not a prefix route */
1482 if (!pid && in_nlh) {
1483 pid = in_nlh->nlmsg_pid;
1486 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1487 rtm = NLMSG_DATA(nlh);
1488 rtm->rtm_family = AF_INET6;
1489 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1490 rtm->rtm_src_len = rt->rt6i_src.plen;
1492 rtm->rtm_table = RT_TABLE_MAIN;
1493 if (rt->rt6i_flags&RTF_REJECT)
1494 rtm->rtm_type = RTN_UNREACHABLE;
1495 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1496 rtm->rtm_type = RTN_LOCAL;
1498 rtm->rtm_type = RTN_UNICAST;
1500 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1501 rtm->rtm_protocol = rt->rt6i_protocol;
1502 if (rt->rt6i_flags&RTF_DYNAMIC)
1503 rtm->rtm_protocol = RTPROT_REDIRECT;
1504 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1505 rtm->rtm_protocol = RTPROT_KERNEL;
1506 else if (rt->rt6i_flags&RTF_DEFAULT)
1507 rtm->rtm_protocol = RTPROT_RA;
1509 if (rt->rt6i_flags&RTF_CACHE)
1510 rtm->rtm_flags |= RTM_F_CLONED;
1513 RTA_PUT(skb, RTA_DST, 16, dst);
1514 rtm->rtm_dst_len = 128;
1515 } else if (rtm->rtm_dst_len)
1516 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1517 #ifdef CONFIG_IPV6_SUBTREES
1519 RTA_PUT(skb, RTA_SRC, 16, src);
1520 rtm->rtm_src_len = 128;
1521 } else if (rtm->rtm_src_len)
1522 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1525 RTA_PUT(skb, RTA_IIF, 4, &iif);
1527 struct in6_addr saddr_buf;
1528 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1529 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1531 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1532 goto rtattr_failure;
1533 if (rt->u.dst.neighbour)
1534 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1536 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1537 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1538 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1539 if (rt->rt6i_expires)
1540 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1543 ci.rta_used = rt->u.dst.__use;
1544 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1545 ci.rta_error = rt->u.dst.error;
1549 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1550 nlh->nlmsg_len = skb->tail - b;
1555 skb_trim(skb, b - skb->data);
1559 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1561 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1564 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1565 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1566 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1570 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1571 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1575 static int fib6_dump_node(struct fib6_walker_t *w)
1578 struct rt6_info *rt;
1580 for (rt = w->leaf; rt; rt = rt->u.next) {
1581 res = rt6_dump_route(rt, w->args);
1583 /* Frame is full, suspend walking */
1593 static void fib6_dump_end(struct netlink_callback *cb)
1595 struct fib6_walker_t *w = (void*)cb->args[0];
1599 fib6_walker_unlink(w);
1603 cb->done = (void*)cb->args[1];
1608 static int fib6_dump_done(struct netlink_callback *cb)
1611 return cb->done(cb);
1614 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1616 struct rt6_rtnl_dump_arg arg;
1617 struct fib6_walker_t *w;
1623 w = (void*)cb->args[0];
1627 * 1. hook callback destructor.
1629 cb->args[1] = (long)cb->done;
1630 cb->done = fib6_dump_done;
1633 * 2. allocate and initialize walker.
1635 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1638 RT6_TRACE("dump<%p", w);
1639 memset(w, 0, sizeof(*w));
1640 w->root = &ip6_routing_table;
1641 w->func = fib6_dump_node;
1643 cb->args[0] = (long)w;
1644 read_lock_bh(&rt6_lock);
1646 read_unlock_bh(&rt6_lock);
1649 read_lock_bh(&rt6_lock);
1650 res = fib6_walk_continue(w);
1651 read_unlock_bh(&rt6_lock);
1654 if (res <= 0 && skb->len == 0)
1655 RT6_TRACE("%p>dump end\n", w);
1657 res = res < 0 ? res : skb->len;
1658 /* res < 0 is an error. (really, impossible)
1659 res == 0 means that dump is complete, but skb still can contain data.
1660 res > 0 dump is not complete, but frame is full.
1662 /* Destroy walker, if dump of this table is complete. */
1668 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1670 struct rtattr **rta = arg;
1673 struct sk_buff *skb;
1675 struct rt6_info *rt;
1677 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1681 /* Reserve room for dummy headers, this skb can pass
1682 through good chunk of routing engine.
1684 skb->mac.raw = skb->data;
1685 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1687 memset(&fl, 0, sizeof(fl));
1689 ipv6_addr_copy(&fl.fl6_src,
1690 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1692 ipv6_addr_copy(&fl.fl6_dst,
1693 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1696 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1699 struct net_device *dev;
1700 dev = __dev_get_by_index(iif);
1709 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1711 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1713 skb->dst = &rt->u.dst;
1715 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1716 err = rt6_fill_node(skb, rt,
1717 &fl.fl6_dst, &fl.fl6_src,
1719 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1720 nlh->nlmsg_seq, nlh, 0);
1726 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1736 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1738 struct sk_buff *skb;
1739 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1741 skb = alloc_skb(size, gfp_any());
1743 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1746 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1748 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1751 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1752 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1759 #ifdef CONFIG_PROC_FS
1761 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1772 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1774 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1777 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1782 if (arg->len >= arg->length)
1785 for (i=0; i<16; i++) {
1786 sprintf(arg->buffer + arg->len, "%02x",
1787 rt->rt6i_dst.addr.s6_addr[i]);
1790 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1793 #ifdef CONFIG_IPV6_SUBTREES
1794 for (i=0; i<16; i++) {
1795 sprintf(arg->buffer + arg->len, "%02x",
1796 rt->rt6i_src.addr.s6_addr[i]);
1799 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1802 sprintf(arg->buffer + arg->len,
1803 "00000000000000000000000000000000 00 ");
1807 if (rt->rt6i_nexthop) {
1808 for (i=0; i<16; i++) {
1809 sprintf(arg->buffer + arg->len, "%02x",
1810 rt->rt6i_nexthop->primary_key[i]);
1814 sprintf(arg->buffer + arg->len,
1815 "00000000000000000000000000000000");
1818 arg->len += sprintf(arg->buffer + arg->len,
1819 " %08x %08x %08x %08x %8s\n",
1820 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1821 rt->u.dst.__use, rt->rt6i_flags,
1822 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1826 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1828 struct rt6_proc_arg arg;
1829 arg.buffer = buffer;
1830 arg.offset = offset;
1831 arg.length = length;
1835 read_lock_bh(&rt6_lock);
1836 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1837 read_unlock_bh(&rt6_lock);
1841 *start += offset % RT6_INFO_LEN;
1843 arg.len -= offset % RT6_INFO_LEN;
1845 if (arg.len > length)
1853 extern struct rt6_statistics rt6_stats;
1855 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1857 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1858 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1859 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1860 rt6_stats.fib_rt_cache,
1861 atomic_read(&ip6_dst_ops.entries),
1862 rt6_stats.fib_discarded_routes);
1867 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1869 return single_open(file, rt6_stats_seq_show, NULL);
1872 static struct file_operations rt6_stats_seq_fops = {
1873 .owner = THIS_MODULE,
1874 .open = rt6_stats_seq_open,
1876 .llseek = seq_lseek,
1877 .release = single_release,
1879 #endif /* CONFIG_PROC_FS */
1881 #ifdef CONFIG_SYSCTL
1883 static int flush_delay;
1886 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1887 void *buffer, size_t *lenp)
1890 proc_dointvec(ctl, write, filp, buffer, lenp);
1891 if (flush_delay < 0)
1893 fib6_run_gc((unsigned long)flush_delay);
1899 ctl_table ipv6_route_table[] = {
1901 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1902 .procname = "flush",
1903 .data = &flush_delay,
1904 .maxlen = sizeof(int),
1906 .proc_handler = &ipv6_sysctl_rtcache_flush
1909 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1910 .procname = "gc_thresh",
1911 .data = &ip6_dst_ops.gc_thresh,
1912 .maxlen = sizeof(int),
1914 .proc_handler = &proc_dointvec,
1917 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1918 .procname = "max_size",
1919 .data = &ip6_rt_max_size,
1920 .maxlen = sizeof(int),
1922 .proc_handler = &proc_dointvec,
1925 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1926 .procname = "gc_min_interval",
1927 .data = &ip6_rt_gc_min_interval,
1928 .maxlen = sizeof(int),
1930 .proc_handler = &proc_dointvec_jiffies,
1931 .strategy = &sysctl_jiffies,
1934 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1935 .procname = "gc_timeout",
1936 .data = &ip6_rt_gc_timeout,
1937 .maxlen = sizeof(int),
1939 .proc_handler = &proc_dointvec_jiffies,
1940 .strategy = &sysctl_jiffies,
1943 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
1944 .procname = "gc_interval",
1945 .data = &ip6_rt_gc_interval,
1946 .maxlen = sizeof(int),
1948 .proc_handler = &proc_dointvec_jiffies,
1949 .strategy = &sysctl_jiffies,
1952 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
1953 .procname = "gc_elasticity",
1954 .data = &ip6_rt_gc_elasticity,
1955 .maxlen = sizeof(int),
1957 .proc_handler = &proc_dointvec_jiffies,
1958 .strategy = &sysctl_jiffies,
1961 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
1962 .procname = "mtu_expires",
1963 .data = &ip6_rt_mtu_expires,
1964 .maxlen = sizeof(int),
1966 .proc_handler = &proc_dointvec_jiffies,
1967 .strategy = &sysctl_jiffies,
1970 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
1971 .procname = "min_adv_mss",
1972 .data = &ip6_rt_min_advmss,
1973 .maxlen = sizeof(int),
1975 .proc_handler = &proc_dointvec_jiffies,
1976 .strategy = &sysctl_jiffies,
1983 void __init ip6_route_init(void)
1985 struct proc_dir_entry *p;
1987 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1988 sizeof(struct rt6_info),
1989 0, SLAB_HWCACHE_ALIGN,
1991 if (!ip6_dst_ops.kmem_cachep)
1992 panic("cannot create ip6_dst_cache");
1995 #ifdef CONFIG_PROC_FS
1996 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
1998 p->owner = THIS_MODULE;
2000 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2007 void __exit ip6_route_cleanup(void)
2009 #ifdef CONFIG_PROC_FS
2010 proc_net_remove("ipv6_route");
2011 proc_net_remove("rt6_stats");
2018 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);