ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45
46 #define FSprintk(a...)
47
48 static struct fib_info  *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
50 int fib_info_cnt;
51
52 #define for_fib_info() { struct fib_info *fi; \
53         for (fi = fib_info_list; fi; fi = fi->fib_next)
54
55 #define endfor_fib_info() }
56
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
58
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
60
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
63
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
68
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
70
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
73
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
78
79 #define endfor_nexthops(fi) }
80
81
82 static struct 
83 {
84         int     error;
85         u8      scope;
86 } fib_props[RTA_MAX + 1] = {
87         {
88                 .error  = 0,
89                 .scope  = RT_SCOPE_NOWHERE,
90         },      /* RTN_UNSPEC */
91         {
92                 .error  = 0,
93                 .scope  = RT_SCOPE_UNIVERSE,
94         },      /* RTN_UNICAST */
95         {
96                 .error  = 0,
97                 .scope  = RT_SCOPE_HOST,
98         },      /* RTN_LOCAL */
99         {
100                 .error  = 0,
101                 .scope  = RT_SCOPE_LINK,
102         },      /* RTN_BROADCAST */
103         {
104                 .error  = 0,
105                 .scope  = RT_SCOPE_LINK,
106         },      /* RTN_ANYCAST */
107         {
108                 .error  = 0,
109                 .scope  = RT_SCOPE_UNIVERSE,
110         },      /* RTN_MULTICAST */
111         {
112                 .error  = -EINVAL,
113                 .scope  = RT_SCOPE_UNIVERSE,
114         },      /* RTN_BLACKHOLE */
115         {
116                 .error  = -EHOSTUNREACH,
117                 .scope  = RT_SCOPE_UNIVERSE,
118         },      /* RTN_UNREACHABLE */
119         {
120                 .error  = -EACCES,
121                 .scope  = RT_SCOPE_UNIVERSE,
122         },      /* RTN_PROHIBIT */
123         {
124                 .error  = -EAGAIN,
125                 .scope  = RT_SCOPE_UNIVERSE,
126         },      /* RTN_THROW */
127 #ifdef CONFIG_IP_ROUTE_NAT
128         {
129                 .error  = 0,
130                 .scope  = RT_SCOPE_HOST,
131         },      /* RTN_NAT */
132 #else
133         {
134                 .error  = -EINVAL,
135                 .scope  = RT_SCOPE_NOWHERE,
136         },      /* RTN_NAT */
137 #endif
138         {
139                 .error  = -EINVAL,
140                 .scope  = RT_SCOPE_NOWHERE,
141         },      /* RTN_XRESOLVE */
142 };
143
144
145 /* Release a nexthop info record */
146
147 void free_fib_info(struct fib_info *fi)
148 {
149         if (fi->fib_dead == 0) {
150                 printk("Freeing alive fib_info %p\n", fi);
151                 return;
152         }
153         change_nexthops(fi) {
154                 if (nh->nh_dev)
155                         dev_put(nh->nh_dev);
156                 nh->nh_dev = NULL;
157         } endfor_nexthops(fi);
158         fib_info_cnt--;
159         kfree(fi);
160 }
161
162 void fib_release_info(struct fib_info *fi)
163 {
164         write_lock(&fib_info_lock);
165         if (fi && --fi->fib_treeref == 0) {
166                 if (fi->fib_next)
167                         fi->fib_next->fib_prev = fi->fib_prev;
168                 if (fi->fib_prev)
169                         fi->fib_prev->fib_next = fi->fib_next;
170                 if (fi == fib_info_list)
171                         fib_info_list = fi->fib_next;
172                 fi->fib_dead = 1;
173                 fib_info_put(fi);
174         }
175         write_unlock(&fib_info_lock);
176 }
177
178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179 {
180         const struct fib_nh *onh = ofi->fib_nh;
181
182         for_nexthops(fi) {
183                 if (nh->nh_oif != onh->nh_oif ||
184                     nh->nh_gw  != onh->nh_gw ||
185                     nh->nh_scope != onh->nh_scope ||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187                     nh->nh_weight != onh->nh_weight ||
188 #endif
189 #ifdef CONFIG_NET_CLS_ROUTE
190                     nh->nh_tclassid != onh->nh_tclassid ||
191 #endif
192                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193                         return -1;
194                 onh++;
195         } endfor_nexthops(fi);
196         return 0;
197 }
198
199 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
200 {
201         for_fib_info() {
202                 if (fi->fib_nhs != nfi->fib_nhs)
203                         continue;
204                 if (nfi->fib_protocol == fi->fib_protocol &&
205                     nfi->fib_prefsrc == fi->fib_prefsrc &&
206                     nfi->fib_priority == fi->fib_priority &&
207                     memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
208                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
209                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
210                         return fi;
211         } endfor_fib_info();
212         return NULL;
213 }
214
215 /* Check, that the gateway is already configured.
216    Used only by redirect accept routine.
217  */
218
219 int ip_fib_check_default(u32 gw, struct net_device *dev)
220 {
221         read_lock(&fib_info_lock);
222         for_fib_info() {
223                 if (fi->fib_flags & RTNH_F_DEAD)
224                         continue;
225                 for_nexthops(fi) {
226                         if (nh->nh_dev == dev && nh->nh_gw == gw &&
227                             nh->nh_scope == RT_SCOPE_LINK &&
228                             !(nh->nh_flags&RTNH_F_DEAD)) {
229                                 read_unlock(&fib_info_lock);
230                                 return 0;
231                         }
232                 } endfor_nexthops(fi);
233         } endfor_fib_info();
234         read_unlock(&fib_info_lock);
235         return -1;
236 }
237
238 #ifdef CONFIG_IP_ROUTE_MULTIPATH
239
240 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
241 {
242         while (RTA_OK(attr,attrlen)) {
243                 if (attr->rta_type == type)
244                         return *(u32*)RTA_DATA(attr);
245                 attr = RTA_NEXT(attr, attrlen);
246         }
247         return 0;
248 }
249
250 static int
251 fib_count_nexthops(struct rtattr *rta)
252 {
253         int nhs = 0;
254         struct rtnexthop *nhp = RTA_DATA(rta);
255         int nhlen = RTA_PAYLOAD(rta);
256
257         while (nhlen >= (int)sizeof(struct rtnexthop)) {
258                 if ((nhlen -= nhp->rtnh_len) < 0)
259                         return 0;
260                 nhs++;
261                 nhp = RTNH_NEXT(nhp);
262         };
263         return nhs;
264 }
265
266 static int
267 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
268 {
269         struct rtnexthop *nhp = RTA_DATA(rta);
270         int nhlen = RTA_PAYLOAD(rta);
271
272         change_nexthops(fi) {
273                 int attrlen = nhlen - sizeof(struct rtnexthop);
274                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
275                         return -EINVAL;
276                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
277                 nh->nh_oif = nhp->rtnh_ifindex;
278                 nh->nh_weight = nhp->rtnh_hops + 1;
279                 if (attrlen) {
280                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
281 #ifdef CONFIG_NET_CLS_ROUTE
282                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
283 #endif
284                 }
285                 nhp = RTNH_NEXT(nhp);
286         } endfor_nexthops(fi);
287         return 0;
288 }
289
290 #endif
291
292 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
293                  struct fib_info *fi)
294 {
295 #ifdef CONFIG_IP_ROUTE_MULTIPATH
296         struct rtnexthop *nhp;
297         int nhlen;
298 #endif
299
300         if (rta->rta_priority &&
301             *rta->rta_priority != fi->fib_priority)
302                 return 1;
303
304         if (rta->rta_oif || rta->rta_gw) {
305                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
306                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
307                         return 0;
308                 return 1;
309         }
310
311 #ifdef CONFIG_IP_ROUTE_MULTIPATH
312         if (rta->rta_mp == NULL)
313                 return 0;
314         nhp = RTA_DATA(rta->rta_mp);
315         nhlen = RTA_PAYLOAD(rta->rta_mp);
316         
317         for_nexthops(fi) {
318                 int attrlen = nhlen - sizeof(struct rtnexthop);
319                 u32 gw;
320
321                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
322                         return -EINVAL;
323                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
324                         return 1;
325                 if (attrlen) {
326                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
327                         if (gw && gw != nh->nh_gw)
328                                 return 1;
329 #ifdef CONFIG_NET_CLS_ROUTE
330                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
331                         if (gw && gw != nh->nh_tclassid)
332                                 return 1;
333 #endif
334                 }
335                 nhp = RTNH_NEXT(nhp);
336         } endfor_nexthops(fi);
337 #endif
338         return 0;
339 }
340
341
342 /*
343    Picture
344    -------
345
346    Semantics of nexthop is very messy by historical reasons.
347    We have to take into account, that:
348    a) gateway can be actually local interface address,
349       so that gatewayed route is direct.
350    b) gateway must be on-link address, possibly
351       described not by an ifaddr, but also by a direct route.
352    c) If both gateway and interface are specified, they should not
353       contradict.
354    d) If we use tunnel routes, gateway could be not on-link.
355
356    Attempt to reconcile all of these (alas, self-contradictory) conditions
357    results in pretty ugly and hairy code with obscure logic.
358
359    I chose to generalized it instead, so that the size
360    of code does not increase practically, but it becomes
361    much more general.
362    Every prefix is assigned a "scope" value: "host" is local address,
363    "link" is direct route,
364    [ ... "site" ... "interior" ... ]
365    and "universe" is true gateway route with global meaning.
366
367    Every prefix refers to a set of "nexthop"s (gw, oif),
368    where gw must have narrower scope. This recursion stops
369    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
370    which means that gw is forced to be on link.
371
372    Code is still hairy, but now it is apparently logically
373    consistent and very flexible. F.e. as by-product it allows
374    to co-exists in peace independent exterior and interior
375    routing processes.
376
377    Normally it looks as following.
378
379    {universe prefix}  -> (gw, oif) [scope link]
380                           |
381                           |-> {link prefix} -> (gw, oif) [scope local]
382                                                 |
383                                                 |-> {local prefix} (terminal node)
384  */
385
386 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
387 {
388         int err;
389
390         if (nh->nh_gw) {
391                 struct fib_result res;
392
393 #ifdef CONFIG_IP_ROUTE_PERVASIVE
394                 if (nh->nh_flags&RTNH_F_PERVASIVE)
395                         return 0;
396 #endif
397                 if (nh->nh_flags&RTNH_F_ONLINK) {
398                         struct net_device *dev;
399
400                         if (r->rtm_scope >= RT_SCOPE_LINK)
401                                 return -EINVAL;
402                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
403                                 return -EINVAL;
404                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
405                                 return -ENODEV;
406                         if (!(dev->flags&IFF_UP))
407                                 return -ENETDOWN;
408                         nh->nh_dev = dev;
409                         dev_hold(dev);
410                         nh->nh_scope = RT_SCOPE_LINK;
411                         return 0;
412                 }
413                 {
414                         struct flowi fl = { .nl_u = { .ip4_u =
415                                                       { .daddr = nh->nh_gw,
416                                                         .scope = r->rtm_scope + 1 } },
417                                             .oif = nh->nh_oif };
418
419                         /* It is not necessary, but requires a bit of thinking */
420                         if (fl.fl4_scope < RT_SCOPE_LINK)
421                                 fl.fl4_scope = RT_SCOPE_LINK;
422                         if ((err = fib_lookup(&fl, &res)) != 0)
423                                 return err;
424                 }
425                 err = -EINVAL;
426                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
427                         goto out;
428                 nh->nh_scope = res.scope;
429                 nh->nh_oif = FIB_RES_OIF(res);
430                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
431                         goto out;
432                 dev_hold(nh->nh_dev);
433                 err = -ENETDOWN;
434                 if (!(nh->nh_dev->flags & IFF_UP))
435                         goto out;
436                 err = 0;
437 out:
438                 fib_res_put(&res);
439                 return err;
440         } else {
441                 struct in_device *in_dev;
442
443                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
444                         return -EINVAL;
445
446                 in_dev = inetdev_by_index(nh->nh_oif);
447                 if (in_dev == NULL)
448                         return -ENODEV;
449                 if (!(in_dev->dev->flags&IFF_UP)) {
450                         in_dev_put(in_dev);
451                         return -ENETDOWN;
452                 }
453                 nh->nh_dev = in_dev->dev;
454                 dev_hold(nh->nh_dev);
455                 nh->nh_scope = RT_SCOPE_HOST;
456                 in_dev_put(in_dev);
457         }
458         return 0;
459 }
460
461 struct fib_info *
462 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
463                 const struct nlmsghdr *nlh, int *errp)
464 {
465         int err;
466         struct fib_info *fi = NULL;
467         struct fib_info *ofi;
468 #ifdef CONFIG_IP_ROUTE_MULTIPATH
469         int nhs = 1;
470 #else
471         const int nhs = 1;
472 #endif
473
474         /* Fast check to catch the most weird cases */
475         if (fib_props[r->rtm_type].scope > r->rtm_scope)
476                 goto err_inval;
477
478 #ifdef CONFIG_IP_ROUTE_MULTIPATH
479         if (rta->rta_mp) {
480                 nhs = fib_count_nexthops(rta->rta_mp);
481                 if (nhs == 0)
482                         goto err_inval;
483         }
484 #endif
485
486         fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
487         err = -ENOBUFS;
488         if (fi == NULL)
489                 goto failure;
490         fib_info_cnt++;
491         memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
492
493         fi->fib_protocol = r->rtm_protocol;
494         fi->fib_nhs = nhs;
495         fi->fib_flags = r->rtm_flags;
496         if (rta->rta_priority)
497                 fi->fib_priority = *rta->rta_priority;
498         if (rta->rta_mx) {
499                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
500                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
501
502                 while (RTA_OK(attr, attrlen)) {
503                         unsigned flavor = attr->rta_type;
504                         if (flavor) {
505                                 if (flavor > RTAX_MAX)
506                                         goto err_inval;
507                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
508                         }
509                         attr = RTA_NEXT(attr, attrlen);
510                 }
511         }
512         if (rta->rta_prefsrc)
513                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
514
515         if (rta->rta_mp) {
516 #ifdef CONFIG_IP_ROUTE_MULTIPATH
517                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
518                         goto failure;
519                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
520                         goto err_inval;
521                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
522                         goto err_inval;
523 #ifdef CONFIG_NET_CLS_ROUTE
524                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
525                         goto err_inval;
526 #endif
527 #else
528                 goto err_inval;
529 #endif
530         } else {
531                 struct fib_nh *nh = fi->fib_nh;
532                 if (rta->rta_oif)
533                         nh->nh_oif = *rta->rta_oif;
534                 if (rta->rta_gw)
535                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
536 #ifdef CONFIG_NET_CLS_ROUTE
537                 if (rta->rta_flow)
538                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
539 #endif
540                 nh->nh_flags = r->rtm_flags;
541 #ifdef CONFIG_IP_ROUTE_MULTIPATH
542                 nh->nh_weight = 1;
543 #endif
544         }
545
546 #ifdef CONFIG_IP_ROUTE_NAT
547         if (r->rtm_type == RTN_NAT) {
548                 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
549                         goto err_inval;
550                 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
551                 goto link_it;
552         }
553 #endif
554
555         if (fib_props[r->rtm_type].error) {
556                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
557                         goto err_inval;
558                 goto link_it;
559         }
560
561         if (r->rtm_scope > RT_SCOPE_HOST)
562                 goto err_inval;
563
564         if (r->rtm_scope == RT_SCOPE_HOST) {
565                 struct fib_nh *nh = fi->fib_nh;
566
567                 /* Local address is added. */
568                 if (nhs != 1 || nh->nh_gw)
569                         goto err_inval;
570                 nh->nh_scope = RT_SCOPE_NOWHERE;
571                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
572                 err = -ENODEV;
573                 if (nh->nh_dev == NULL)
574                         goto failure;
575         } else {
576                 change_nexthops(fi) {
577                         if ((err = fib_check_nh(r, fi, nh)) != 0)
578                                 goto failure;
579                 } endfor_nexthops(fi)
580         }
581
582         if (fi->fib_prefsrc) {
583                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
584                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
585                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
586                                 goto err_inval;
587         }
588
589 link_it:
590         if ((ofi = fib_find_info(fi)) != NULL) {
591                 fi->fib_dead = 1;
592                 free_fib_info(fi);
593                 ofi->fib_treeref++;
594                 return ofi;
595         }
596
597         fi->fib_treeref++;
598         atomic_inc(&fi->fib_clntref);
599         write_lock(&fib_info_lock);
600         fi->fib_next = fib_info_list;
601         fi->fib_prev = NULL;
602         if (fib_info_list)
603                 fib_info_list->fib_prev = fi;
604         fib_info_list = fi;
605         write_unlock(&fib_info_lock);
606         return fi;
607
608 err_inval:
609         err = -EINVAL;
610
611 failure:
612         *errp = err;
613         if (fi) {
614                 fi->fib_dead = 1;
615                 free_fib_info(fi);
616         }
617         return NULL;
618 }
619
620 int 
621 fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
622 {
623         int err = fib_props[type].error;
624
625         if (err == 0) {
626                 if (fi->fib_flags&RTNH_F_DEAD)
627                         return 1;
628
629                 res->fi = fi;
630
631                 switch (type) {
632 #ifdef CONFIG_IP_ROUTE_NAT
633                 case RTN_NAT:
634                         FIB_RES_RESET(*res);
635                         atomic_inc(&fi->fib_clntref);
636                         return 0;
637 #endif
638                 case RTN_UNICAST:
639                 case RTN_LOCAL:
640                 case RTN_BROADCAST:
641                 case RTN_ANYCAST:
642                 case RTN_MULTICAST:
643                         for_nexthops(fi) {
644                                 if (nh->nh_flags&RTNH_F_DEAD)
645                                         continue;
646                                 if (!flp->oif || flp->oif == nh->nh_oif)
647                                         break;
648                         }
649 #ifdef CONFIG_IP_ROUTE_MULTIPATH
650                         if (nhsel < fi->fib_nhs) {
651                                 res->nh_sel = nhsel;
652                                 atomic_inc(&fi->fib_clntref);
653                                 return 0;
654                         }
655 #else
656                         if (nhsel < 1) {
657                                 atomic_inc(&fi->fib_clntref);
658                                 return 0;
659                         }
660 #endif
661                         endfor_nexthops(fi);
662                         res->fi = NULL;
663                         return 1;
664                 default:
665                         res->fi = NULL;
666                         printk(KERN_DEBUG "impossible 102\n");
667                         return -EINVAL;
668                 }
669         }
670         return err;
671 }
672
673 /* Find appropriate source address to this destination */
674
675 u32 __fib_res_prefsrc(struct fib_result *res)
676 {
677         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
678 }
679
680 int
681 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
682               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
683               struct fib_info *fi)
684 {
685         struct rtmsg *rtm;
686         struct nlmsghdr  *nlh;
687         unsigned char    *b = skb->tail;
688
689         nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
690         rtm = NLMSG_DATA(nlh);
691         rtm->rtm_family = AF_INET;
692         rtm->rtm_dst_len = dst_len;
693         rtm->rtm_src_len = 0;
694         rtm->rtm_tos = tos;
695         rtm->rtm_table = tb_id;
696         rtm->rtm_type = type;
697         rtm->rtm_flags = fi->fib_flags;
698         rtm->rtm_scope = scope;
699         if (rtm->rtm_dst_len)
700                 RTA_PUT(skb, RTA_DST, 4, dst);
701         rtm->rtm_protocol = fi->fib_protocol;
702         if (fi->fib_priority)
703                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
704 #ifdef CONFIG_NET_CLS_ROUTE
705         if (fi->fib_nh[0].nh_tclassid)
706                 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
707 #endif
708         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
709                 goto rtattr_failure;
710         if (fi->fib_prefsrc)
711                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
712         if (fi->fib_nhs == 1) {
713                 if (fi->fib_nh->nh_gw)
714                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
715                 if (fi->fib_nh->nh_oif)
716                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
717         }
718 #ifdef CONFIG_IP_ROUTE_MULTIPATH
719         if (fi->fib_nhs > 1) {
720                 struct rtnexthop *nhp;
721                 struct rtattr *mp_head;
722                 if (skb_tailroom(skb) <= RTA_SPACE(0))
723                         goto rtattr_failure;
724                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
725
726                 for_nexthops(fi) {
727                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
728                                 goto rtattr_failure;
729                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
730                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
731                         nhp->rtnh_hops = nh->nh_weight-1;
732                         nhp->rtnh_ifindex = nh->nh_oif;
733                         if (nh->nh_gw)
734                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
735                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
736                 } endfor_nexthops(fi);
737                 mp_head->rta_type = RTA_MULTIPATH;
738                 mp_head->rta_len = skb->tail - (u8*)mp_head;
739         }
740 #endif
741         nlh->nlmsg_len = skb->tail - b;
742         return skb->len;
743
744 nlmsg_failure:
745 rtattr_failure:
746         skb_trim(skb, b - skb->data);
747         return -1;
748 }
749
750 #ifndef CONFIG_IP_NOSIOCRT
751
752 int
753 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
754                     struct kern_rta *rta, struct rtentry *r)
755 {
756         int    plen;
757         u32    *ptr;
758
759         memset(rtm, 0, sizeof(*rtm));
760         memset(rta, 0, sizeof(*rta));
761
762         if (r->rt_dst.sa_family != AF_INET)
763                 return -EAFNOSUPPORT;
764
765         /* Check mask for validity:
766            a) it must be contiguous.
767            b) destination must have all host bits clear.
768            c) if application forgot to set correct family (AF_INET),
769               reject request unless it is absolutely clear i.e.
770               both family and mask are zero.
771          */
772         plen = 32;
773         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
774         if (!(r->rt_flags&RTF_HOST)) {
775                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
776                 if (r->rt_genmask.sa_family != AF_INET) {
777                         if (mask || r->rt_genmask.sa_family)
778                                 return -EAFNOSUPPORT;
779                 }
780                 if (bad_mask(mask, *ptr))
781                         return -EINVAL;
782                 plen = inet_mask_len(mask);
783         }
784
785         nl->nlmsg_flags = NLM_F_REQUEST;
786         nl->nlmsg_pid = 0;
787         nl->nlmsg_seq = 0;
788         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
789         if (cmd == SIOCDELRT) {
790                 nl->nlmsg_type = RTM_DELROUTE;
791                 nl->nlmsg_flags = 0;
792         } else {
793                 nl->nlmsg_type = RTM_NEWROUTE;
794                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
795                 rtm->rtm_protocol = RTPROT_BOOT;
796         }
797
798         rtm->rtm_dst_len = plen;
799         rta->rta_dst = ptr;
800
801         if (r->rt_metric) {
802                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
803                 rta->rta_priority = (u32*)&r->rt_pad3;
804         }
805         if (r->rt_flags&RTF_REJECT) {
806                 rtm->rtm_scope = RT_SCOPE_HOST;
807                 rtm->rtm_type = RTN_UNREACHABLE;
808                 return 0;
809         }
810         rtm->rtm_scope = RT_SCOPE_NOWHERE;
811         rtm->rtm_type = RTN_UNICAST;
812
813         if (r->rt_dev) {
814                 char *colon;
815                 struct net_device *dev;
816                 char   devname[IFNAMSIZ];
817
818                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
819                         return -EFAULT;
820                 devname[IFNAMSIZ-1] = 0;
821                 colon = strchr(devname, ':');
822                 if (colon)
823                         *colon = 0;
824                 dev = __dev_get_by_name(devname);
825                 if (!dev)
826                         return -ENODEV;
827                 rta->rta_oif = &dev->ifindex;
828                 if (colon) {
829                         struct in_ifaddr *ifa;
830                         struct in_device *in_dev = __in_dev_get(dev);
831                         if (!in_dev)
832                                 return -ENODEV;
833                         *colon = ':';
834                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
835                                 if (strcmp(ifa->ifa_label, devname) == 0)
836                                         break;
837                         if (ifa == NULL)
838                                 return -ENODEV;
839                         rta->rta_prefsrc = &ifa->ifa_local;
840                 }
841         }
842
843         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
844         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
845                 rta->rta_gw = ptr;
846                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
847                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
848         }
849
850         if (cmd == SIOCDELRT)
851                 return 0;
852
853         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
854                 return -EINVAL;
855
856         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
857                 rtm->rtm_scope = RT_SCOPE_LINK;
858
859         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
860                 struct rtattr *rec;
861                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
862                 if (mx == NULL)
863                         return -ENOMEM;
864                 rta->rta_mx = mx;
865                 mx->rta_type = RTA_METRICS;
866                 mx->rta_len  = RTA_LENGTH(0);
867                 if (r->rt_flags&RTF_MTU) {
868                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
869                         rec->rta_type = RTAX_ADVMSS;
870                         rec->rta_len = RTA_LENGTH(4);
871                         mx->rta_len += RTA_LENGTH(4);
872                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
873                 }
874                 if (r->rt_flags&RTF_WINDOW) {
875                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
876                         rec->rta_type = RTAX_WINDOW;
877                         rec->rta_len = RTA_LENGTH(4);
878                         mx->rta_len += RTA_LENGTH(4);
879                         *(u32*)RTA_DATA(rec) = r->rt_window;
880                 }
881                 if (r->rt_flags&RTF_IRTT) {
882                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
883                         rec->rta_type = RTAX_RTT;
884                         rec->rta_len = RTA_LENGTH(4);
885                         mx->rta_len += RTA_LENGTH(4);
886                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
887                 }
888         }
889         return 0;
890 }
891
892 #endif
893
894 /*
895    Update FIB if:
896    - local address disappeared -> we must delete all the entries
897      referring to it.
898    - device went down -> we must shutdown all nexthops going via it.
899  */
900
901 int fib_sync_down(u32 local, struct net_device *dev, int force)
902 {
903         int ret = 0;
904         int scope = RT_SCOPE_NOWHERE;
905         
906         if (force)
907                 scope = -1;
908
909         for_fib_info() {
910                 if (local && fi->fib_prefsrc == local) {
911                         fi->fib_flags |= RTNH_F_DEAD;
912                         ret++;
913                 } else if (dev && fi->fib_nhs) {
914                         int dead = 0;
915
916                         change_nexthops(fi) {
917                                 if (nh->nh_flags&RTNH_F_DEAD)
918                                         dead++;
919                                 else if (nh->nh_dev == dev &&
920                                          nh->nh_scope != scope) {
921                                         nh->nh_flags |= RTNH_F_DEAD;
922 #ifdef CONFIG_IP_ROUTE_MULTIPATH
923                                         spin_lock_bh(&fib_multipath_lock);
924                                         fi->fib_power -= nh->nh_power;
925                                         nh->nh_power = 0;
926                                         spin_unlock_bh(&fib_multipath_lock);
927 #endif
928                                         dead++;
929                                 }
930 #ifdef CONFIG_IP_ROUTE_MULTIPATH
931                                 if (force > 1 && nh->nh_dev == dev) {
932                                         dead = fi->fib_nhs;
933                                         break;
934                                 }
935 #endif
936                         } endfor_nexthops(fi)
937                         if (dead == fi->fib_nhs) {
938                                 fi->fib_flags |= RTNH_F_DEAD;
939                                 ret++;
940                         }
941                 }
942         } endfor_fib_info();
943         return ret;
944 }
945
946 #ifdef CONFIG_IP_ROUTE_MULTIPATH
947
948 /*
949    Dead device goes up. We wake up dead nexthops.
950    It takes sense only on multipath routes.
951  */
952
953 int fib_sync_up(struct net_device *dev)
954 {
955         int ret = 0;
956
957         if (!(dev->flags&IFF_UP))
958                 return 0;
959
960         for_fib_info() {
961                 int alive = 0;
962
963                 change_nexthops(fi) {
964                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
965                                 alive++;
966                                 continue;
967                         }
968                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
969                                 continue;
970                         if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
971                                 continue;
972                         alive++;
973                         spin_lock_bh(&fib_multipath_lock);
974                         nh->nh_power = 0;
975                         nh->nh_flags &= ~RTNH_F_DEAD;
976                         spin_unlock_bh(&fib_multipath_lock);
977                 } endfor_nexthops(fi)
978
979                 if (alive > 0) {
980                         fi->fib_flags &= ~RTNH_F_DEAD;
981                         ret++;
982                 }
983         } endfor_fib_info();
984         return ret;
985 }
986
987 /*
988    The algorithm is suboptimal, but it provides really
989    fair weighted route distribution.
990  */
991
992 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
993 {
994         struct fib_info *fi = res->fi;
995         int w;
996
997         spin_lock_bh(&fib_multipath_lock);
998         if (fi->fib_power <= 0) {
999                 int power = 0;
1000                 change_nexthops(fi) {
1001                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1002                                 power += nh->nh_weight;
1003                                 nh->nh_power = nh->nh_weight;
1004                         }
1005                 } endfor_nexthops(fi);
1006                 fi->fib_power = power;
1007                 if (power <= 0) {
1008                         spin_unlock_bh(&fib_multipath_lock);
1009                         /* Race condition: route has just become dead. */
1010                         res->nh_sel = 0;
1011                         return;
1012                 }
1013         }
1014
1015
1016         /* w should be random number [0..fi->fib_power-1],
1017            it is pretty bad approximation.
1018          */
1019
1020         w = jiffies % fi->fib_power;
1021
1022         change_nexthops(fi) {
1023                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1024                         if ((w -= nh->nh_power) <= 0) {
1025                                 nh->nh_power--;
1026                                 fi->fib_power--;
1027                                 res->nh_sel = nhsel;
1028                                 spin_unlock_bh(&fib_multipath_lock);
1029                                 return;
1030                         }
1031                 }
1032         } endfor_nexthops(fi);
1033
1034         /* Race condition: route has just become dead. */
1035         res->nh_sel = 0;
1036         spin_unlock_bh(&fib_multipath_lock);
1037 }
1038 #endif