2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
46 #define FSprintk(a...)
48 static struct fib_info *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
52 #define for_fib_info() { struct fib_info *fi; \
53 for (fi = fib_info_list; fi; fi = fi->fib_next)
55 #define endfor_fib_info() }
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
79 #define endfor_nexthops(fi) }
86 } fib_props[RTA_MAX + 1] = {
89 .scope = RT_SCOPE_NOWHERE,
93 .scope = RT_SCOPE_UNIVERSE,
97 .scope = RT_SCOPE_HOST,
101 .scope = RT_SCOPE_LINK,
102 }, /* RTN_BROADCAST */
105 .scope = RT_SCOPE_LINK,
109 .scope = RT_SCOPE_UNIVERSE,
110 }, /* RTN_MULTICAST */
113 .scope = RT_SCOPE_UNIVERSE,
114 }, /* RTN_BLACKHOLE */
116 .error = -EHOSTUNREACH,
117 .scope = RT_SCOPE_UNIVERSE,
118 }, /* RTN_UNREACHABLE */
121 .scope = RT_SCOPE_UNIVERSE,
122 }, /* RTN_PROHIBIT */
125 .scope = RT_SCOPE_UNIVERSE,
127 #ifdef CONFIG_IP_ROUTE_NAT
130 .scope = RT_SCOPE_HOST,
135 .scope = RT_SCOPE_NOWHERE,
140 .scope = RT_SCOPE_NOWHERE,
141 }, /* RTN_XRESOLVE */
145 /* Release a nexthop info record */
147 void free_fib_info(struct fib_info *fi)
149 if (fi->fib_dead == 0) {
150 printk("Freeing alive fib_info %p\n", fi);
153 change_nexthops(fi) {
157 } endfor_nexthops(fi);
162 void fib_release_info(struct fib_info *fi)
164 write_lock(&fib_info_lock);
165 if (fi && --fi->fib_treeref == 0) {
167 fi->fib_next->fib_prev = fi->fib_prev;
169 fi->fib_prev->fib_next = fi->fib_next;
170 if (fi == fib_info_list)
171 fib_info_list = fi->fib_next;
175 write_unlock(&fib_info_lock);
178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180 const struct fib_nh *onh = ofi->fib_nh;
183 if (nh->nh_oif != onh->nh_oif ||
184 nh->nh_gw != onh->nh_gw ||
185 nh->nh_scope != onh->nh_scope ||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187 nh->nh_weight != onh->nh_weight ||
189 #ifdef CONFIG_NET_CLS_ROUTE
190 nh->nh_tclassid != onh->nh_tclassid ||
192 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
195 } endfor_nexthops(fi);
199 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
202 if (fi->fib_nhs != nfi->fib_nhs)
204 if (nfi->fib_protocol == fi->fib_protocol &&
205 nfi->fib_prefsrc == fi->fib_prefsrc &&
206 nfi->fib_priority == fi->fib_priority &&
207 memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
208 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
209 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
215 /* Check, that the gateway is already configured.
216 Used only by redirect accept routine.
219 int ip_fib_check_default(u32 gw, struct net_device *dev)
221 read_lock(&fib_info_lock);
223 if (fi->fib_flags & RTNH_F_DEAD)
226 if (nh->nh_dev == dev && nh->nh_gw == gw &&
227 nh->nh_scope == RT_SCOPE_LINK &&
228 !(nh->nh_flags&RTNH_F_DEAD)) {
229 read_unlock(&fib_info_lock);
232 } endfor_nexthops(fi);
234 read_unlock(&fib_info_lock);
238 #ifdef CONFIG_IP_ROUTE_MULTIPATH
240 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
242 while (RTA_OK(attr,attrlen)) {
243 if (attr->rta_type == type)
244 return *(u32*)RTA_DATA(attr);
245 attr = RTA_NEXT(attr, attrlen);
251 fib_count_nexthops(struct rtattr *rta)
254 struct rtnexthop *nhp = RTA_DATA(rta);
255 int nhlen = RTA_PAYLOAD(rta);
257 while (nhlen >= (int)sizeof(struct rtnexthop)) {
258 if ((nhlen -= nhp->rtnh_len) < 0)
261 nhp = RTNH_NEXT(nhp);
267 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
269 struct rtnexthop *nhp = RTA_DATA(rta);
270 int nhlen = RTA_PAYLOAD(rta);
272 change_nexthops(fi) {
273 int attrlen = nhlen - sizeof(struct rtnexthop);
274 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
276 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
277 nh->nh_oif = nhp->rtnh_ifindex;
278 nh->nh_weight = nhp->rtnh_hops + 1;
280 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
281 #ifdef CONFIG_NET_CLS_ROUTE
282 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
285 nhp = RTNH_NEXT(nhp);
286 } endfor_nexthops(fi);
292 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
295 #ifdef CONFIG_IP_ROUTE_MULTIPATH
296 struct rtnexthop *nhp;
300 if (rta->rta_priority &&
301 *rta->rta_priority != fi->fib_priority)
304 if (rta->rta_oif || rta->rta_gw) {
305 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
306 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
311 #ifdef CONFIG_IP_ROUTE_MULTIPATH
312 if (rta->rta_mp == NULL)
314 nhp = RTA_DATA(rta->rta_mp);
315 nhlen = RTA_PAYLOAD(rta->rta_mp);
318 int attrlen = nhlen - sizeof(struct rtnexthop);
321 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
323 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
326 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
327 if (gw && gw != nh->nh_gw)
329 #ifdef CONFIG_NET_CLS_ROUTE
330 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
331 if (gw && gw != nh->nh_tclassid)
335 nhp = RTNH_NEXT(nhp);
336 } endfor_nexthops(fi);
346 Semantics of nexthop is very messy by historical reasons.
347 We have to take into account, that:
348 a) gateway can be actually local interface address,
349 so that gatewayed route is direct.
350 b) gateway must be on-link address, possibly
351 described not by an ifaddr, but also by a direct route.
352 c) If both gateway and interface are specified, they should not
354 d) If we use tunnel routes, gateway could be not on-link.
356 Attempt to reconcile all of these (alas, self-contradictory) conditions
357 results in pretty ugly and hairy code with obscure logic.
359 I chose to generalized it instead, so that the size
360 of code does not increase practically, but it becomes
362 Every prefix is assigned a "scope" value: "host" is local address,
363 "link" is direct route,
364 [ ... "site" ... "interior" ... ]
365 and "universe" is true gateway route with global meaning.
367 Every prefix refers to a set of "nexthop"s (gw, oif),
368 where gw must have narrower scope. This recursion stops
369 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
370 which means that gw is forced to be on link.
372 Code is still hairy, but now it is apparently logically
373 consistent and very flexible. F.e. as by-product it allows
374 to co-exists in peace independent exterior and interior
377 Normally it looks as following.
379 {universe prefix} -> (gw, oif) [scope link]
381 |-> {link prefix} -> (gw, oif) [scope local]
383 |-> {local prefix} (terminal node)
386 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
391 struct fib_result res;
393 #ifdef CONFIG_IP_ROUTE_PERVASIVE
394 if (nh->nh_flags&RTNH_F_PERVASIVE)
397 if (nh->nh_flags&RTNH_F_ONLINK) {
398 struct net_device *dev;
400 if (r->rtm_scope >= RT_SCOPE_LINK)
402 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
404 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
406 if (!(dev->flags&IFF_UP))
410 nh->nh_scope = RT_SCOPE_LINK;
414 struct flowi fl = { .nl_u = { .ip4_u =
415 { .daddr = nh->nh_gw,
416 .scope = r->rtm_scope + 1 } },
419 /* It is not necessary, but requires a bit of thinking */
420 if (fl.fl4_scope < RT_SCOPE_LINK)
421 fl.fl4_scope = RT_SCOPE_LINK;
422 if ((err = fib_lookup(&fl, &res)) != 0)
426 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
428 nh->nh_scope = res.scope;
429 nh->nh_oif = FIB_RES_OIF(res);
430 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
432 dev_hold(nh->nh_dev);
434 if (!(nh->nh_dev->flags & IFF_UP))
441 struct in_device *in_dev;
443 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
446 in_dev = inetdev_by_index(nh->nh_oif);
449 if (!(in_dev->dev->flags&IFF_UP)) {
453 nh->nh_dev = in_dev->dev;
454 dev_hold(nh->nh_dev);
455 nh->nh_scope = RT_SCOPE_HOST;
462 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
463 const struct nlmsghdr *nlh, int *errp)
466 struct fib_info *fi = NULL;
467 struct fib_info *ofi;
468 #ifdef CONFIG_IP_ROUTE_MULTIPATH
474 /* Fast check to catch the most weird cases */
475 if (fib_props[r->rtm_type].scope > r->rtm_scope)
478 #ifdef CONFIG_IP_ROUTE_MULTIPATH
480 nhs = fib_count_nexthops(rta->rta_mp);
486 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
491 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
493 fi->fib_protocol = r->rtm_protocol;
495 fi->fib_flags = r->rtm_flags;
496 if (rta->rta_priority)
497 fi->fib_priority = *rta->rta_priority;
499 int attrlen = RTA_PAYLOAD(rta->rta_mx);
500 struct rtattr *attr = RTA_DATA(rta->rta_mx);
502 while (RTA_OK(attr, attrlen)) {
503 unsigned flavor = attr->rta_type;
505 if (flavor > RTAX_MAX)
507 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
509 attr = RTA_NEXT(attr, attrlen);
512 if (rta->rta_prefsrc)
513 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
516 #ifdef CONFIG_IP_ROUTE_MULTIPATH
517 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
519 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
521 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
523 #ifdef CONFIG_NET_CLS_ROUTE
524 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
531 struct fib_nh *nh = fi->fib_nh;
533 nh->nh_oif = *rta->rta_oif;
535 memcpy(&nh->nh_gw, rta->rta_gw, 4);
536 #ifdef CONFIG_NET_CLS_ROUTE
538 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
540 nh->nh_flags = r->rtm_flags;
541 #ifdef CONFIG_IP_ROUTE_MULTIPATH
546 #ifdef CONFIG_IP_ROUTE_NAT
547 if (r->rtm_type == RTN_NAT) {
548 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
550 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
555 if (fib_props[r->rtm_type].error) {
556 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
561 if (r->rtm_scope > RT_SCOPE_HOST)
564 if (r->rtm_scope == RT_SCOPE_HOST) {
565 struct fib_nh *nh = fi->fib_nh;
567 /* Local address is added. */
568 if (nhs != 1 || nh->nh_gw)
570 nh->nh_scope = RT_SCOPE_NOWHERE;
571 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
573 if (nh->nh_dev == NULL)
576 change_nexthops(fi) {
577 if ((err = fib_check_nh(r, fi, nh)) != 0)
579 } endfor_nexthops(fi)
582 if (fi->fib_prefsrc) {
583 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
584 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
585 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
590 if ((ofi = fib_find_info(fi)) != NULL) {
598 atomic_inc(&fi->fib_clntref);
599 write_lock(&fib_info_lock);
600 fi->fib_next = fib_info_list;
603 fib_info_list->fib_prev = fi;
605 write_unlock(&fib_info_lock);
621 fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
623 int err = fib_props[type].error;
626 if (fi->fib_flags&RTNH_F_DEAD)
632 #ifdef CONFIG_IP_ROUTE_NAT
635 atomic_inc(&fi->fib_clntref);
644 if (nh->nh_flags&RTNH_F_DEAD)
646 if (!flp->oif || flp->oif == nh->nh_oif)
649 #ifdef CONFIG_IP_ROUTE_MULTIPATH
650 if (nhsel < fi->fib_nhs) {
652 atomic_inc(&fi->fib_clntref);
657 atomic_inc(&fi->fib_clntref);
666 printk(KERN_DEBUG "impossible 102\n");
673 /* Find appropriate source address to this destination */
675 u32 __fib_res_prefsrc(struct fib_result *res)
677 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
681 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
682 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
686 struct nlmsghdr *nlh;
687 unsigned char *b = skb->tail;
689 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
690 rtm = NLMSG_DATA(nlh);
691 rtm->rtm_family = AF_INET;
692 rtm->rtm_dst_len = dst_len;
693 rtm->rtm_src_len = 0;
695 rtm->rtm_table = tb_id;
696 rtm->rtm_type = type;
697 rtm->rtm_flags = fi->fib_flags;
698 rtm->rtm_scope = scope;
699 if (rtm->rtm_dst_len)
700 RTA_PUT(skb, RTA_DST, 4, dst);
701 rtm->rtm_protocol = fi->fib_protocol;
702 if (fi->fib_priority)
703 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
704 #ifdef CONFIG_NET_CLS_ROUTE
705 if (fi->fib_nh[0].nh_tclassid)
706 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
708 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
711 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
712 if (fi->fib_nhs == 1) {
713 if (fi->fib_nh->nh_gw)
714 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
715 if (fi->fib_nh->nh_oif)
716 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
718 #ifdef CONFIG_IP_ROUTE_MULTIPATH
719 if (fi->fib_nhs > 1) {
720 struct rtnexthop *nhp;
721 struct rtattr *mp_head;
722 if (skb_tailroom(skb) <= RTA_SPACE(0))
724 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
727 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
729 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
730 nhp->rtnh_flags = nh->nh_flags & 0xFF;
731 nhp->rtnh_hops = nh->nh_weight-1;
732 nhp->rtnh_ifindex = nh->nh_oif;
734 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
735 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
736 } endfor_nexthops(fi);
737 mp_head->rta_type = RTA_MULTIPATH;
738 mp_head->rta_len = skb->tail - (u8*)mp_head;
741 nlh->nlmsg_len = skb->tail - b;
746 skb_trim(skb, b - skb->data);
750 #ifndef CONFIG_IP_NOSIOCRT
753 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
754 struct kern_rta *rta, struct rtentry *r)
759 memset(rtm, 0, sizeof(*rtm));
760 memset(rta, 0, sizeof(*rta));
762 if (r->rt_dst.sa_family != AF_INET)
763 return -EAFNOSUPPORT;
765 /* Check mask for validity:
766 a) it must be contiguous.
767 b) destination must have all host bits clear.
768 c) if application forgot to set correct family (AF_INET),
769 reject request unless it is absolutely clear i.e.
770 both family and mask are zero.
773 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
774 if (!(r->rt_flags&RTF_HOST)) {
775 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
776 if (r->rt_genmask.sa_family != AF_INET) {
777 if (mask || r->rt_genmask.sa_family)
778 return -EAFNOSUPPORT;
780 if (bad_mask(mask, *ptr))
782 plen = inet_mask_len(mask);
785 nl->nlmsg_flags = NLM_F_REQUEST;
788 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
789 if (cmd == SIOCDELRT) {
790 nl->nlmsg_type = RTM_DELROUTE;
793 nl->nlmsg_type = RTM_NEWROUTE;
794 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
795 rtm->rtm_protocol = RTPROT_BOOT;
798 rtm->rtm_dst_len = plen;
802 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
803 rta->rta_priority = (u32*)&r->rt_pad3;
805 if (r->rt_flags&RTF_REJECT) {
806 rtm->rtm_scope = RT_SCOPE_HOST;
807 rtm->rtm_type = RTN_UNREACHABLE;
810 rtm->rtm_scope = RT_SCOPE_NOWHERE;
811 rtm->rtm_type = RTN_UNICAST;
815 struct net_device *dev;
816 char devname[IFNAMSIZ];
818 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
820 devname[IFNAMSIZ-1] = 0;
821 colon = strchr(devname, ':');
824 dev = __dev_get_by_name(devname);
827 rta->rta_oif = &dev->ifindex;
829 struct in_ifaddr *ifa;
830 struct in_device *in_dev = __in_dev_get(dev);
834 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
835 if (strcmp(ifa->ifa_label, devname) == 0)
839 rta->rta_prefsrc = &ifa->ifa_local;
843 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
844 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
846 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
847 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
850 if (cmd == SIOCDELRT)
853 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
856 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
857 rtm->rtm_scope = RT_SCOPE_LINK;
859 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
861 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
865 mx->rta_type = RTA_METRICS;
866 mx->rta_len = RTA_LENGTH(0);
867 if (r->rt_flags&RTF_MTU) {
868 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
869 rec->rta_type = RTAX_ADVMSS;
870 rec->rta_len = RTA_LENGTH(4);
871 mx->rta_len += RTA_LENGTH(4);
872 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
874 if (r->rt_flags&RTF_WINDOW) {
875 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
876 rec->rta_type = RTAX_WINDOW;
877 rec->rta_len = RTA_LENGTH(4);
878 mx->rta_len += RTA_LENGTH(4);
879 *(u32*)RTA_DATA(rec) = r->rt_window;
881 if (r->rt_flags&RTF_IRTT) {
882 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
883 rec->rta_type = RTAX_RTT;
884 rec->rta_len = RTA_LENGTH(4);
885 mx->rta_len += RTA_LENGTH(4);
886 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
896 - local address disappeared -> we must delete all the entries
898 - device went down -> we must shutdown all nexthops going via it.
901 int fib_sync_down(u32 local, struct net_device *dev, int force)
904 int scope = RT_SCOPE_NOWHERE;
910 if (local && fi->fib_prefsrc == local) {
911 fi->fib_flags |= RTNH_F_DEAD;
913 } else if (dev && fi->fib_nhs) {
916 change_nexthops(fi) {
917 if (nh->nh_flags&RTNH_F_DEAD)
919 else if (nh->nh_dev == dev &&
920 nh->nh_scope != scope) {
921 nh->nh_flags |= RTNH_F_DEAD;
922 #ifdef CONFIG_IP_ROUTE_MULTIPATH
923 spin_lock_bh(&fib_multipath_lock);
924 fi->fib_power -= nh->nh_power;
926 spin_unlock_bh(&fib_multipath_lock);
930 #ifdef CONFIG_IP_ROUTE_MULTIPATH
931 if (force > 1 && nh->nh_dev == dev) {
936 } endfor_nexthops(fi)
937 if (dead == fi->fib_nhs) {
938 fi->fib_flags |= RTNH_F_DEAD;
946 #ifdef CONFIG_IP_ROUTE_MULTIPATH
949 Dead device goes up. We wake up dead nexthops.
950 It takes sense only on multipath routes.
953 int fib_sync_up(struct net_device *dev)
957 if (!(dev->flags&IFF_UP))
963 change_nexthops(fi) {
964 if (!(nh->nh_flags&RTNH_F_DEAD)) {
968 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
970 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
973 spin_lock_bh(&fib_multipath_lock);
975 nh->nh_flags &= ~RTNH_F_DEAD;
976 spin_unlock_bh(&fib_multipath_lock);
977 } endfor_nexthops(fi)
980 fi->fib_flags &= ~RTNH_F_DEAD;
988 The algorithm is suboptimal, but it provides really
989 fair weighted route distribution.
992 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
994 struct fib_info *fi = res->fi;
997 spin_lock_bh(&fib_multipath_lock);
998 if (fi->fib_power <= 0) {
1000 change_nexthops(fi) {
1001 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1002 power += nh->nh_weight;
1003 nh->nh_power = nh->nh_weight;
1005 } endfor_nexthops(fi);
1006 fi->fib_power = power;
1008 spin_unlock_bh(&fib_multipath_lock);
1009 /* Race condition: route has just become dead. */
1016 /* w should be random number [0..fi->fib_power-1],
1017 it is pretty bad approximation.
1020 w = jiffies % fi->fib_power;
1022 change_nexthops(fi) {
1023 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1024 if ((w -= nh->nh_power) <= 0) {
1027 res->nh_sel = nhsel;
1028 spin_unlock_bh(&fib_multipath_lock);
1032 } endfor_nexthops(fi);
1034 /* Race condition: route has just become dead. */
1036 spin_unlock_bh(&fib_multipath_lock);