2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static void ip6_dst_ifdown(struct dst_entry *, int how);
88 static int ip6_dst_gc(void);
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void ip6_link_failure(struct sk_buff *skb);
93 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 static struct dst_ops ip6_dst_ops = {
97 .protocol = __constant_htons(ETH_P_IPV6),
100 .check = ip6_dst_check,
101 .destroy = ip6_dst_destroy,
102 .ifdown = ip6_dst_ifdown,
103 .negative_advice = ip6_negative_advice,
104 .link_failure = ip6_link_failure,
105 .update_pmtu = ip6_rt_update_pmtu,
106 .entry_size = sizeof(struct rt6_info),
109 struct rt6_info ip6_null_entry = {
112 .__refcnt = ATOMIC_INIT(1),
114 .dev = &loopback_dev,
116 .error = -ENETUNREACH,
117 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
118 .input = ip6_pkt_discard,
119 .output = ip6_pkt_discard_out,
121 .path = (struct dst_entry*)&ip6_null_entry,
124 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
125 .rt6i_metric = ~(u32) 0,
126 .rt6i_ref = ATOMIC_INIT(1),
129 struct fib6_node ip6_routing_table = {
130 .leaf = &ip6_null_entry,
131 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 /* Protects all the ip6 fib */
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 static void ip6_dst_destroy(struct dst_entry *dst)
147 struct rt6_info *rt = (struct rt6_info *)dst;
148 struct inet6_dev *idev = rt->rt6i_idev;
151 rt->rt6i_idev = NULL;
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
158 ip6_dst_destroy(dst);
162 * Route lookup. Any rt6_lock is implied.
165 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
169 struct rt6_info *local = NULL;
170 struct rt6_info *sprt;
173 for (sprt = rt; sprt; sprt = sprt->u.next) {
174 struct net_device *dev = sprt->rt6i_dev;
175 if (dev->ifindex == oif)
177 if (dev->flags&IFF_LOOPBACK)
185 return &ip6_null_entry;
191 * pointer to the last default router chosen. BH is disabled locally.
193 static struct rt6_info *rt6_dflt_pointer;
194 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
196 /* Default Router Selection (RFC 2461 6.3.6) */
197 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
199 struct rt6_info *match = NULL;
200 struct rt6_info *sprt;
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct neighbour *neigh;
209 sprt->rt6i_dev->ifindex == oif))
212 if (sprt == rt6_dflt_pointer)
215 if ((neigh = sprt->rt6i_nexthop) != NULL) {
216 read_lock_bh(&neigh->lock);
217 switch (neigh->nud_state) {
235 read_unlock_bh(&neigh->lock);
238 read_unlock_bh(&neigh->lock);
243 if (m > mpri || m >= 12) {
247 /* we choose the last default router if it
248 * is in (probably) reachable state.
249 * If route changed, we should do pmtu
250 * discovery. --yoshfuji
257 spin_lock(&rt6_dflt_lock);
260 * No default routers are known to be reachable.
263 if (rt6_dflt_pointer) {
264 for (sprt = rt6_dflt_pointer->u.next;
265 sprt; sprt = sprt->u.next) {
266 if (sprt->u.dst.obsolete <= 0 &&
267 sprt->u.dst.error == 0) {
274 sprt = sprt->u.next) {
275 if (sprt->u.dst.obsolete <= 0 &&
276 sprt->u.dst.error == 0) {
280 if (sprt == rt6_dflt_pointer)
287 if (rt6_dflt_pointer != match)
288 RT6_TRACE("changed default router: %p->%p\n",
289 rt6_dflt_pointer, match);
290 rt6_dflt_pointer = match;
292 spin_unlock(&rt6_dflt_lock);
296 * Last Resort: if no default routers found,
297 * use addrconf default route.
298 * We don't record this route.
300 for (sprt = ip6_routing_table.leaf;
301 sprt; sprt = sprt->u.next) {
302 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
305 sprt->rt6i_dev->ifindex == oif))) {
311 /* no default route. give up. */
312 match = &ip6_null_entry;
319 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
322 struct fib6_node *fn;
325 read_lock_bh(&rt6_lock);
326 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
327 rt = rt6_device_match(fn->leaf, oif, strict);
328 dst_hold(&rt->u.dst);
330 read_unlock_bh(&rt6_lock);
332 rt->u.dst.lastuse = jiffies;
333 if (rt->u.dst.error == 0)
335 dst_release(&rt->u.dst);
339 /* rt6_ins is called with FREE rt6_lock.
340 It takes new route entry, the addition fails by any reason the
341 route is freed. In any case, if caller does not hold it, it may
345 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
349 write_lock_bh(&rt6_lock);
350 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
351 write_unlock_bh(&rt6_lock);
356 /* No rt6_lock! If COW failed, the function returns dead route entry
357 with dst->error set to errno value.
360 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
361 struct in6_addr *saddr)
370 rt = ip6_rt_copy(ort);
373 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
375 if (!(rt->rt6i_flags&RTF_GATEWAY))
376 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
378 rt->rt6i_dst.plen = 128;
379 rt->rt6i_flags |= RTF_CACHE;
380 rt->u.dst.flags |= DST_HOST;
382 #ifdef CONFIG_IPV6_SUBTREES
383 if (rt->rt6i_src.plen && saddr) {
384 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
385 rt->rt6i_src.plen = 128;
389 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
391 dst_hold(&rt->u.dst);
393 err = rt6_ins(rt, NULL, NULL);
397 rt->u.dst.error = err;
401 dst_hold(&ip6_null_entry.u.dst);
402 return &ip6_null_entry;
405 #define BACKTRACK() \
406 if (rt == &ip6_null_entry && strict) { \
407 while ((fn = fn->parent) != NULL) { \
408 if (fn->fn_flags & RTN_ROOT) { \
409 dst_hold(&rt->u.dst); \
412 if (fn->fn_flags & RTN_RTINFO) \
418 void ip6_route_input(struct sk_buff *skb)
420 struct fib6_node *fn;
425 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
428 read_lock_bh(&rt6_lock);
430 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
431 &skb->nh.ipv6h->saddr);
436 if ((rt->rt6i_flags & RTF_CACHE)) {
437 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
439 dst_hold(&rt->u.dst);
443 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
446 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
447 read_unlock_bh(&rt6_lock);
449 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
450 &skb->nh.ipv6h->saddr);
452 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
454 /* Race condition! In the gap, when rt6_lock was
455 released someone could insert this route. Relookup.
457 dst_release(&rt->u.dst);
460 dst_hold(&rt->u.dst);
463 read_unlock_bh(&rt6_lock);
465 rt->u.dst.lastuse = jiffies;
467 skb->dst = (struct dst_entry *) rt;
470 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
472 struct fib6_node *fn;
477 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
480 read_lock_bh(&rt6_lock);
482 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
487 if ((rt->rt6i_flags & RTF_CACHE)) {
488 rt = rt6_device_match(rt, fl->oif, strict);
490 dst_hold(&rt->u.dst);
493 if (rt->rt6i_flags & RTF_DEFAULT) {
494 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
495 rt = rt6_best_dflt(rt, fl->oif);
497 rt = rt6_device_match(rt, fl->oif, strict);
501 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
502 read_unlock_bh(&rt6_lock);
504 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
506 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
509 /* Race condition! In the gap, when rt6_lock was
510 released someone could insert this route. Relookup.
512 dst_release(&rt->u.dst);
515 dst_hold(&rt->u.dst);
518 read_unlock_bh(&rt6_lock);
520 rt->u.dst.lastuse = jiffies;
527 * Destination cache support functions
530 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
534 rt = (struct rt6_info *) dst;
536 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
543 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
545 struct rt6_info *rt = (struct rt6_info *) dst;
548 if (rt->rt6i_flags & RTF_CACHE)
549 ip6_del_rt(rt, NULL, NULL);
556 static void ip6_link_failure(struct sk_buff *skb)
560 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
562 rt = (struct rt6_info *) skb->dst;
564 if (rt->rt6i_flags&RTF_CACHE) {
565 dst_set_expires(&rt->u.dst, 0);
566 rt->rt6i_flags |= RTF_EXPIRES;
567 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
568 rt->rt6i_node->fn_sernum = -1;
572 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
574 struct rt6_info *rt6 = (struct rt6_info*)dst;
576 if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
577 rt6->rt6i_flags |= RTF_MODIFIED;
578 if (mtu < IPV6_MIN_MTU)
580 dst->metrics[RTAX_MTU-1] = mtu;
584 /* Protected by rt6_lock. */
585 static struct dst_entry *ndisc_dst_gc_list;
586 static int ipv6_get_mtu(struct net_device *dev);
587 static inline unsigned int ipv6_advmss(unsigned int mtu);
589 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
590 struct neighbour *neigh,
591 struct in6_addr *addr,
592 int (*output)(struct sk_buff **))
594 struct rt6_info *rt = ip6_dst_alloc();
596 if (unlikely(rt == NULL))
603 neigh = ndisc_get_neigh(dev, addr);
606 rt->rt6i_idev = in6_dev_get(dev);
607 rt->rt6i_nexthop = neigh;
608 atomic_set(&rt->u.dst.__refcnt, 1);
609 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
610 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
611 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
612 rt->u.dst.output = output;
614 #if 0 /* there's no chance to use these for ndisc */
615 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
618 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
619 rt->rt6i_dst.plen = 128;
622 write_lock_bh(&rt6_lock);
623 rt->u.dst.next = ndisc_dst_gc_list;
624 ndisc_dst_gc_list = &rt->u.dst;
625 write_unlock_bh(&rt6_lock);
627 fib6_force_start_gc();
630 return (struct dst_entry *)rt;
633 int ndisc_dst_gc(int *more)
635 struct dst_entry *dst, *next, **pprev;
639 pprev = &ndisc_dst_gc_list;
641 while ((dst = *pprev) != NULL) {
642 if (!atomic_read(&dst->__refcnt)) {
655 static int ip6_dst_gc(void)
657 static unsigned expire = 30*HZ;
658 static unsigned long last_gc;
659 unsigned long now = jiffies;
661 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
662 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
668 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
669 expire = ip6_rt_gc_timeout>>1;
672 expire -= expire>>ip6_rt_gc_elasticity;
673 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
676 /* Clean host part of a prefix. Not necessary in radix tree,
677 but results in cleaner routing tables.
679 Remove it only when all the things will work!
682 static int ipv6_get_mtu(struct net_device *dev)
684 int mtu = IPV6_MIN_MTU;
685 struct inet6_dev *idev;
687 idev = in6_dev_get(dev);
689 mtu = idev->cnf.mtu6;
695 static inline unsigned int ipv6_advmss(unsigned int mtu)
697 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
699 if (mtu < ip6_rt_min_advmss)
700 mtu = ip6_rt_min_advmss;
703 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
704 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
705 * IPV6_MAXPLEN is also valid and means: "any MSS,
706 * rely only on pmtu discovery"
708 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
713 static int ipv6_get_hoplimit(struct net_device *dev)
715 int hoplimit = ipv6_devconf.hop_limit;
716 struct inet6_dev *idev;
718 idev = in6_dev_get(dev);
720 hoplimit = idev->cnf.hop_limit;
730 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
736 struct net_device *dev = NULL;
739 rta = (struct rtattr **) _rtattr;
741 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
743 #ifndef CONFIG_IPV6_SUBTREES
744 if (rtmsg->rtmsg_src_len)
747 if (rtmsg->rtmsg_ifindex) {
748 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
753 if (rtmsg->rtmsg_metric == 0)
754 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
756 rt = ip6_dst_alloc();
761 rt->u.dst.obsolete = -1;
762 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
763 if (nlh && (r = NLMSG_DATA(nlh))) {
764 rt->rt6i_protocol = r->rtm_protocol;
766 rt->rt6i_protocol = RTPROT_BOOT;
769 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
771 if (addr_type & IPV6_ADDR_MULTICAST)
772 rt->u.dst.input = ip6_mc_input;
774 rt->u.dst.input = ip6_forward;
776 rt->u.dst.output = ip6_output;
778 ipv6_addr_prefix(&rt->rt6i_dst.addr,
779 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
780 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
781 if (rt->rt6i_dst.plen == 128)
782 rt->u.dst.flags = DST_HOST;
784 #ifdef CONFIG_IPV6_SUBTREES
785 ipv6_addr_prefix(&rt->rt6i_src.addr,
786 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
787 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
790 rt->rt6i_metric = rtmsg->rtmsg_metric;
792 /* We cannot add true routes via loopback here,
793 they would result in kernel looping; promote them to reject routes
795 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
796 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
801 rt->u.dst.output = ip6_pkt_discard_out;
802 rt->u.dst.input = ip6_pkt_discard;
803 rt->u.dst.error = -ENETUNREACH;
804 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
808 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
809 struct in6_addr *gw_addr;
812 gw_addr = &rtmsg->rtmsg_gateway;
813 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
814 gwa_type = ipv6_addr_type(gw_addr);
816 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
817 struct rt6_info *grt;
819 /* IPv6 strictly inhibits using not link-local
820 addresses as nexthop address.
821 Otherwise, router will not able to send redirects.
822 It is very good, but in some (rare!) circumstances
823 (SIT, PtP, NBMA NOARP links) it is handy to allow
824 some exceptions. --ANK
827 if (!(gwa_type&IPV6_ADDR_UNICAST))
830 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
836 if (dev != grt->rt6i_dev) {
837 dst_release(&grt->u.dst);
844 if (!(grt->rt6i_flags&RTF_GATEWAY))
846 dst_release(&grt->u.dst);
852 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
860 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
861 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
862 if (IS_ERR(rt->rt6i_nexthop)) {
863 err = PTR_ERR(rt->rt6i_nexthop);
864 rt->rt6i_nexthop = NULL;
869 rt->rt6i_flags = rtmsg->rtmsg_flags;
872 if (rta && rta[RTA_METRICS-1]) {
873 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
874 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
876 while (RTA_OK(attr, attrlen)) {
877 unsigned flavor = attr->rta_type;
879 if (flavor > RTAX_MAX) {
883 rt->u.dst.metrics[flavor-1] =
884 *(u32 *)RTA_DATA(attr);
886 attr = RTA_NEXT(attr, attrlen);
890 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
891 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
892 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
893 IPV6_DEFAULT_MCASTHOPS;
895 rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
896 ipv6_get_hoplimit(dev);
899 if (!rt->u.dst.metrics[RTAX_MTU-1])
900 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
901 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
902 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
904 rt->rt6i_idev = in6_dev_get(dev);
905 return rt6_ins(rt, nlh, _rtattr);
910 dst_free((struct dst_entry *) rt);
914 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
918 write_lock_bh(&rt6_lock);
920 spin_lock_bh(&rt6_dflt_lock);
921 rt6_dflt_pointer = NULL;
922 spin_unlock_bh(&rt6_dflt_lock);
924 dst_release(&rt->u.dst);
926 err = fib6_del(rt, nlh, _rtattr);
927 write_unlock_bh(&rt6_lock);
932 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
934 struct fib6_node *fn;
938 read_lock_bh(&rt6_lock);
940 fn = fib6_locate(&ip6_routing_table,
941 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
942 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
945 for (rt = fn->leaf; rt; rt = rt->u.next) {
946 if (rtmsg->rtmsg_ifindex &&
947 (rt->rt6i_dev == NULL ||
948 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
950 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
951 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
953 if (rtmsg->rtmsg_metric &&
954 rtmsg->rtmsg_metric != rt->rt6i_metric)
956 dst_hold(&rt->u.dst);
957 read_unlock_bh(&rt6_lock);
959 return ip6_del_rt(rt, nlh, _rtattr);
962 read_unlock_bh(&rt6_lock);
970 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
971 struct neighbour *neigh, int on_link)
973 struct rt6_info *rt, *nrt;
975 /* Locate old route to this destination. */
976 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
981 if (neigh->dev != rt->rt6i_dev)
984 /* Redirect received -> path was valid.
985 Look, redirects are sent only in response to data packets,
986 so that this nexthop apparently is reachable. --ANK
988 dst_confirm(&rt->u.dst);
990 /* Duplicate redirect: silently ignore. */
991 if (neigh == rt->u.dst.neighbour)
994 /* Current route is on-link; redirect is always invalid.
996 Seems, previous statement is not true. It could
997 be node, which looks for us as on-link (f.e. proxy ndisc)
998 But then router serving it might decide, that we should
999 know truth 8)8) --ANK (980726).
1001 if (!(rt->rt6i_flags&RTF_GATEWAY))
1005 * RFC 2461 specifies that redirects should only be
1006 * accepted if they come from the nexthop to the target.
1007 * Due to the way default routers are chosen, this notion
1008 * is a bit fuzzy and one might need to check all default
1012 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1013 if (rt->rt6i_flags & RTF_DEFAULT) {
1014 struct rt6_info *rt1;
1016 read_lock(&rt6_lock);
1017 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1018 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1019 dst_hold(&rt1->u.dst);
1020 dst_release(&rt->u.dst);
1021 read_unlock(&rt6_lock);
1026 read_unlock(&rt6_lock);
1028 if (net_ratelimit())
1029 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1030 "for redirect target\n");
1037 * We have finally decided to accept it.
1040 nrt = ip6_rt_copy(rt);
1044 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1046 nrt->rt6i_flags &= ~RTF_GATEWAY;
1048 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1049 nrt->rt6i_dst.plen = 128;
1050 nrt->u.dst.flags |= DST_HOST;
1052 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1053 nrt->rt6i_nexthop = neigh_clone(neigh);
1054 /* Reset pmtu, it may be better */
1055 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1056 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1058 if (rt6_ins(nrt, NULL, NULL))
1061 if (rt->rt6i_flags&RTF_CACHE) {
1062 ip6_del_rt(rt, NULL, NULL);
1067 dst_release(&rt->u.dst);
1072 * Handle ICMP "packet too big" messages
1073 * i.e. Path MTU discovery
1076 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1077 struct net_device *dev, u32 pmtu)
1079 struct rt6_info *rt, *nrt;
1081 if (pmtu < IPV6_MIN_MTU) {
1082 if (net_ratelimit())
1083 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1085 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1086 link MTU if the node receives a Packet Too Big message
1087 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1089 pmtu = IPV6_MIN_MTU;
1092 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1097 if (pmtu >= dst_pmtu(&rt->u.dst))
1100 /* New mtu received -> path was valid.
1101 They are sent only in response to data packets,
1102 so that this nexthop apparently is reachable. --ANK
1104 dst_confirm(&rt->u.dst);
1106 /* Host route. If it is static, it would be better
1107 not to override it, but add new one, so that
1108 when cache entry will expire old pmtu
1109 would return automatically.
1111 if (rt->rt6i_flags & RTF_CACHE) {
1112 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1113 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1114 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1119 Two cases are possible:
1120 1. It is connected route. Action: COW
1121 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1123 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1124 nrt = rt6_cow(rt, daddr, saddr);
1125 if (!nrt->u.dst.error) {
1126 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1127 /* According to RFC 1981, detecting PMTU increase shouldn't be
1128 happened within 5 mins, the recommended timer is 10 mins.
1129 Here this route expiration time is set to ip6_rt_mtu_expires
1130 which is 10 mins. After 10 mins the decreased pmtu is expired
1131 and detecting PMTU increase will be automatically happened.
1133 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1134 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1136 dst_release(&nrt->u.dst);
1138 nrt = ip6_rt_copy(rt);
1141 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1142 nrt->rt6i_dst.plen = 128;
1143 nrt->u.dst.flags |= DST_HOST;
1144 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1145 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1146 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1147 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1148 rt6_ins(nrt, NULL, NULL);
1152 dst_release(&rt->u.dst);
1156 * Misc support functions
1159 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1161 struct rt6_info *rt = ip6_dst_alloc();
1164 rt->u.dst.input = ort->u.dst.input;
1165 rt->u.dst.output = ort->u.dst.output;
1167 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1168 rt->u.dst.dev = ort->u.dst.dev;
1170 dev_hold(rt->u.dst.dev);
1171 rt->rt6i_idev = ort->rt6i_idev;
1173 in6_dev_hold(rt->rt6i_idev);
1174 rt->u.dst.lastuse = jiffies;
1175 rt->rt6i_expires = 0;
1177 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1178 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1179 rt->rt6i_metric = 0;
1181 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1182 #ifdef CONFIG_IPV6_SUBTREES
1183 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1189 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1191 struct rt6_info *rt;
1192 struct fib6_node *fn;
1194 fn = &ip6_routing_table;
1196 write_lock_bh(&rt6_lock);
1197 for (rt = fn->leaf; rt; rt=rt->u.next) {
1198 if (dev == rt->rt6i_dev &&
1199 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1203 dst_hold(&rt->u.dst);
1204 write_unlock_bh(&rt6_lock);
1208 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1209 struct net_device *dev)
1211 struct in6_rtmsg rtmsg;
1213 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1214 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1215 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1216 rtmsg.rtmsg_metric = 1024;
1217 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1219 rtmsg.rtmsg_ifindex = dev->ifindex;
1221 ip6_route_add(&rtmsg, NULL, NULL);
1222 return rt6_get_dflt_router(gwaddr, dev);
1225 void rt6_purge_dflt_routers(int last_resort)
1227 struct rt6_info *rt;
1231 flags = RTF_ALLONLINK;
1233 flags = RTF_DEFAULT | RTF_ADDRCONF;
1236 read_lock_bh(&rt6_lock);
1237 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1238 if (rt->rt6i_flags & flags) {
1239 dst_hold(&rt->u.dst);
1241 spin_lock_bh(&rt6_dflt_lock);
1242 rt6_dflt_pointer = NULL;
1243 spin_unlock_bh(&rt6_dflt_lock);
1245 read_unlock_bh(&rt6_lock);
1247 ip6_del_rt(rt, NULL, NULL);
1252 read_unlock_bh(&rt6_lock);
1255 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1257 struct in6_rtmsg rtmsg;
1261 case SIOCADDRT: /* Add a route */
1262 case SIOCDELRT: /* Delete a route */
1263 if (!capable(CAP_NET_ADMIN))
1265 err = copy_from_user(&rtmsg, arg,
1266 sizeof(struct in6_rtmsg));
1273 err = ip6_route_add(&rtmsg, NULL, NULL);
1276 err = ip6_route_del(&rtmsg, NULL, NULL);
1290 * Drop the packet on the floor
1293 int ip6_pkt_discard(struct sk_buff *skb)
1295 IP6_INC_STATS(OutNoRoutes);
1296 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1301 int ip6_pkt_discard_out(struct sk_buff **pskb)
1303 return ip6_pkt_discard(*pskb);
1310 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1312 struct rt6_info *rt = ip6_dst_alloc();
1317 dev_hold(&loopback_dev);
1319 rt->u.dst.flags = DST_HOST;
1320 rt->u.dst.input = ip6_input;
1321 rt->u.dst.output = ip6_output;
1322 rt->rt6i_dev = &loopback_dev;
1323 rt->rt6i_idev = in6_dev_get(&loopback_dev);
1324 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1325 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1326 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1327 rt->u.dst.obsolete = -1;
1329 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1331 rt->rt6i_flags |= RTF_LOCAL;
1332 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1333 if (rt->rt6i_nexthop == NULL) {
1334 dst_free((struct dst_entry *) rt);
1338 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1339 rt->rt6i_dst.plen = 128;
1340 rt6_ins(rt, NULL, NULL);
1345 /* Delete address. Warning: you should check that this address
1346 disappeared before calling this function.
1349 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1351 struct rt6_info *rt;
1354 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1356 if (rt->rt6i_dst.plen == 128)
1357 err = ip6_del_rt(rt, NULL, NULL);
1359 dst_release(&rt->u.dst);
1365 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1367 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1368 rt != &ip6_null_entry) {
1369 RT6_TRACE("deleted by ifdown %p\n", rt);
1375 void rt6_ifdown(struct net_device *dev)
1377 write_lock_bh(&rt6_lock);
1378 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1379 write_unlock_bh(&rt6_lock);
1382 struct rt6_mtu_change_arg
1384 struct net_device *dev;
1388 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1390 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1391 struct inet6_dev *idev;
1393 /* In IPv6 pmtu discovery is not optional,
1394 so that RTAX_MTU lock cannot disable it.
1395 We still use this lock to block changes
1396 caused by addrconf/ndisc.
1399 idev = __in6_dev_get(arg->dev);
1403 /* For administrative MTU increase, there is no way to discover
1404 IPv6 PMTU increase, so PMTU increase should be updated here.
1405 Since RFC 1981 doesn't include administrative MTU increase
1406 update PMTU increase is a MUST. (i.e. jumbo frame)
1409 If new MTU is less than route PMTU, this new MTU will be the
1410 lowest MTU in the path, update the route PMTU to reflect PMTU
1411 decreases; if new MTU is greater than route PMTU, and the
1412 old MTU is the lowest MTU in the path, update the route PMTU
1413 to reflect the increase. In this case if the other nodes' MTU
1414 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1417 if (rt->rt6i_dev == arg->dev &&
1418 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1419 (dst_pmtu(&rt->u.dst) > arg->mtu ||
1420 (dst_pmtu(&rt->u.dst) < arg->mtu &&
1421 dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1422 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1423 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1427 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1429 struct rt6_mtu_change_arg arg;
1433 read_lock_bh(&rt6_lock);
1434 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1435 read_unlock_bh(&rt6_lock);
1438 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1439 struct in6_rtmsg *rtmsg)
1441 memset(rtmsg, 0, sizeof(*rtmsg));
1443 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1444 rtmsg->rtmsg_src_len = r->rtm_src_len;
1445 rtmsg->rtmsg_flags = RTF_UP;
1446 if (r->rtm_type == RTN_UNREACHABLE)
1447 rtmsg->rtmsg_flags |= RTF_REJECT;
1449 if (rta[RTA_GATEWAY-1]) {
1450 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1452 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1453 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1455 if (rta[RTA_DST-1]) {
1456 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1458 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1460 if (rta[RTA_SRC-1]) {
1461 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1463 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1465 if (rta[RTA_OIF-1]) {
1466 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1468 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1470 if (rta[RTA_PRIORITY-1]) {
1471 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1473 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1478 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1480 struct rtmsg *r = NLMSG_DATA(nlh);
1481 struct in6_rtmsg rtmsg;
1483 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1485 return ip6_route_del(&rtmsg, nlh, arg);
1488 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1490 struct rtmsg *r = NLMSG_DATA(nlh);
1491 struct in6_rtmsg rtmsg;
1493 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1495 return ip6_route_add(&rtmsg, nlh, arg);
1498 struct rt6_rtnl_dump_arg
1500 struct sk_buff *skb;
1501 struct netlink_callback *cb;
1504 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1505 struct in6_addr *dst,
1506 struct in6_addr *src,
1508 int type, u32 pid, u32 seq,
1509 struct nlmsghdr *in_nlh, int prefix)
1512 struct nlmsghdr *nlh;
1513 unsigned char *b = skb->tail;
1514 struct rta_cacheinfo ci;
1516 if (prefix) { /* user wants prefix routes only */
1517 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1518 /* success since this is not a prefix route */
1523 if (!pid && in_nlh) {
1524 pid = in_nlh->nlmsg_pid;
1527 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1528 rtm = NLMSG_DATA(nlh);
1529 rtm->rtm_family = AF_INET6;
1530 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1531 rtm->rtm_src_len = rt->rt6i_src.plen;
1533 rtm->rtm_table = RT_TABLE_MAIN;
1534 if (rt->rt6i_flags&RTF_REJECT)
1535 rtm->rtm_type = RTN_UNREACHABLE;
1536 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1537 rtm->rtm_type = RTN_LOCAL;
1539 rtm->rtm_type = RTN_UNICAST;
1541 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1542 rtm->rtm_protocol = rt->rt6i_protocol;
1543 if (rt->rt6i_flags&RTF_DYNAMIC)
1544 rtm->rtm_protocol = RTPROT_REDIRECT;
1545 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1546 rtm->rtm_protocol = RTPROT_KERNEL;
1547 else if (rt->rt6i_flags&RTF_DEFAULT)
1548 rtm->rtm_protocol = RTPROT_RA;
1550 if (rt->rt6i_flags&RTF_CACHE)
1551 rtm->rtm_flags |= RTM_F_CLONED;
1554 RTA_PUT(skb, RTA_DST, 16, dst);
1555 rtm->rtm_dst_len = 128;
1556 } else if (rtm->rtm_dst_len)
1557 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1558 #ifdef CONFIG_IPV6_SUBTREES
1560 RTA_PUT(skb, RTA_SRC, 16, src);
1561 rtm->rtm_src_len = 128;
1562 } else if (rtm->rtm_src_len)
1563 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1566 RTA_PUT(skb, RTA_IIF, 4, &iif);
1568 struct in6_addr saddr_buf;
1569 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1570 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1572 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1573 goto rtattr_failure;
1574 if (rt->u.dst.neighbour)
1575 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1577 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1578 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1579 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1580 if (rt->rt6i_expires)
1581 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1584 ci.rta_used = rt->u.dst.__use;
1585 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1586 ci.rta_error = rt->u.dst.error;
1590 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1591 nlh->nlmsg_len = skb->tail - b;
1596 skb_trim(skb, b - skb->data);
1600 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1602 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1605 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1606 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1607 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1611 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1612 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1616 static int fib6_dump_node(struct fib6_walker_t *w)
1619 struct rt6_info *rt;
1621 for (rt = w->leaf; rt; rt = rt->u.next) {
1622 res = rt6_dump_route(rt, w->args);
1624 /* Frame is full, suspend walking */
1634 static void fib6_dump_end(struct netlink_callback *cb)
1636 struct fib6_walker_t *w = (void*)cb->args[0];
1640 fib6_walker_unlink(w);
1644 cb->done = (void*)cb->args[1];
1649 static int fib6_dump_done(struct netlink_callback *cb)
1652 return cb->done(cb);
1655 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1657 struct rt6_rtnl_dump_arg arg;
1658 struct fib6_walker_t *w;
1664 w = (void*)cb->args[0];
1668 * 1. hook callback destructor.
1670 cb->args[1] = (long)cb->done;
1671 cb->done = fib6_dump_done;
1674 * 2. allocate and initialize walker.
1676 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1679 RT6_TRACE("dump<%p", w);
1680 memset(w, 0, sizeof(*w));
1681 w->root = &ip6_routing_table;
1682 w->func = fib6_dump_node;
1684 cb->args[0] = (long)w;
1685 read_lock_bh(&rt6_lock);
1687 read_unlock_bh(&rt6_lock);
1690 read_lock_bh(&rt6_lock);
1691 res = fib6_walk_continue(w);
1692 read_unlock_bh(&rt6_lock);
1695 if (res <= 0 && skb->len == 0)
1696 RT6_TRACE("%p>dump end\n", w);
1698 res = res < 0 ? res : skb->len;
1699 /* res < 0 is an error. (really, impossible)
1700 res == 0 means that dump is complete, but skb still can contain data.
1701 res > 0 dump is not complete, but frame is full.
1703 /* Destroy walker, if dump of this table is complete. */
1709 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1711 struct rtattr **rta = arg;
1714 struct sk_buff *skb;
1716 struct rt6_info *rt;
1718 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1722 /* Reserve room for dummy headers, this skb can pass
1723 through good chunk of routing engine.
1725 skb->mac.raw = skb->data;
1726 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1728 memset(&fl, 0, sizeof(fl));
1730 ipv6_addr_copy(&fl.fl6_src,
1731 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1733 ipv6_addr_copy(&fl.fl6_dst,
1734 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1737 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1740 struct net_device *dev;
1741 dev = __dev_get_by_index(iif);
1750 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1752 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1754 skb->dst = &rt->u.dst;
1756 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1757 err = rt6_fill_node(skb, rt,
1758 &fl.fl6_dst, &fl.fl6_src,
1760 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1761 nlh->nlmsg_seq, nlh, 0);
1767 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1777 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1779 struct sk_buff *skb;
1780 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1782 skb = alloc_skb(size, gfp_any());
1784 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1787 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1789 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1792 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1793 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1800 #ifdef CONFIG_PROC_FS
1802 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1813 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1815 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1818 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1823 if (arg->len >= arg->length)
1826 for (i=0; i<16; i++) {
1827 sprintf(arg->buffer + arg->len, "%02x",
1828 rt->rt6i_dst.addr.s6_addr[i]);
1831 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1834 #ifdef CONFIG_IPV6_SUBTREES
1835 for (i=0; i<16; i++) {
1836 sprintf(arg->buffer + arg->len, "%02x",
1837 rt->rt6i_src.addr.s6_addr[i]);
1840 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1843 sprintf(arg->buffer + arg->len,
1844 "00000000000000000000000000000000 00 ");
1848 if (rt->rt6i_nexthop) {
1849 for (i=0; i<16; i++) {
1850 sprintf(arg->buffer + arg->len, "%02x",
1851 rt->rt6i_nexthop->primary_key[i]);
1855 sprintf(arg->buffer + arg->len,
1856 "00000000000000000000000000000000");
1859 arg->len += sprintf(arg->buffer + arg->len,
1860 " %08x %08x %08x %08x %8s\n",
1861 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1862 rt->u.dst.__use, rt->rt6i_flags,
1863 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1867 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1869 struct rt6_proc_arg arg;
1870 arg.buffer = buffer;
1871 arg.offset = offset;
1872 arg.length = length;
1876 read_lock_bh(&rt6_lock);
1877 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1878 read_unlock_bh(&rt6_lock);
1882 *start += offset % RT6_INFO_LEN;
1884 arg.len -= offset % RT6_INFO_LEN;
1886 if (arg.len > length)
1894 extern struct rt6_statistics rt6_stats;
1896 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1898 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1899 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1900 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1901 rt6_stats.fib_rt_cache,
1902 atomic_read(&ip6_dst_ops.entries),
1903 rt6_stats.fib_discarded_routes);
1908 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1910 return single_open(file, rt6_stats_seq_show, NULL);
1913 static struct file_operations rt6_stats_seq_fops = {
1914 .owner = THIS_MODULE,
1915 .open = rt6_stats_seq_open,
1917 .llseek = seq_lseek,
1918 .release = single_release,
1920 #endif /* CONFIG_PROC_FS */
1922 #ifdef CONFIG_SYSCTL
1924 static int flush_delay;
1927 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1928 void __user *buffer, size_t *lenp)
1931 proc_dointvec(ctl, write, filp, buffer, lenp);
1932 if (flush_delay < 0)
1934 fib6_run_gc((unsigned long)flush_delay);
1940 ctl_table ipv6_route_table[] = {
1942 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1943 .procname = "flush",
1944 .data = &flush_delay,
1945 .maxlen = sizeof(int),
1947 .proc_handler = &ipv6_sysctl_rtcache_flush
1950 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1951 .procname = "gc_thresh",
1952 .data = &ip6_dst_ops.gc_thresh,
1953 .maxlen = sizeof(int),
1955 .proc_handler = &proc_dointvec,
1958 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1959 .procname = "max_size",
1960 .data = &ip6_rt_max_size,
1961 .maxlen = sizeof(int),
1963 .proc_handler = &proc_dointvec,
1966 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1967 .procname = "gc_min_interval",
1968 .data = &ip6_rt_gc_min_interval,
1969 .maxlen = sizeof(int),
1971 .proc_handler = &proc_dointvec_jiffies,
1972 .strategy = &sysctl_jiffies,
1975 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1976 .procname = "gc_timeout",
1977 .data = &ip6_rt_gc_timeout,
1978 .maxlen = sizeof(int),
1980 .proc_handler = &proc_dointvec_jiffies,
1981 .strategy = &sysctl_jiffies,
1984 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
1985 .procname = "gc_interval",
1986 .data = &ip6_rt_gc_interval,
1987 .maxlen = sizeof(int),
1989 .proc_handler = &proc_dointvec_jiffies,
1990 .strategy = &sysctl_jiffies,
1993 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
1994 .procname = "gc_elasticity",
1995 .data = &ip6_rt_gc_elasticity,
1996 .maxlen = sizeof(int),
1998 .proc_handler = &proc_dointvec_jiffies,
1999 .strategy = &sysctl_jiffies,
2002 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2003 .procname = "mtu_expires",
2004 .data = &ip6_rt_mtu_expires,
2005 .maxlen = sizeof(int),
2007 .proc_handler = &proc_dointvec_jiffies,
2008 .strategy = &sysctl_jiffies,
2011 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2012 .procname = "min_adv_mss",
2013 .data = &ip6_rt_min_advmss,
2014 .maxlen = sizeof(int),
2016 .proc_handler = &proc_dointvec_jiffies,
2017 .strategy = &sysctl_jiffies,
2024 void __init ip6_route_init(void)
2026 struct proc_dir_entry *p;
2028 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2029 sizeof(struct rt6_info),
2030 0, SLAB_HWCACHE_ALIGN,
2032 if (!ip6_dst_ops.kmem_cachep)
2033 panic("cannot create ip6_dst_cache");
2036 #ifdef CONFIG_PROC_FS
2037 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2039 p->owner = THIS_MODULE;
2041 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2048 void __exit ip6_route_cleanup(void)
2050 #ifdef CONFIG_PROC_FS
2051 proc_net_remove("ipv6_route");
2052 proc_net_remove("rt6_stats");
2059 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);