Merge to Fedora kernel-2.6.17-1.2187_FC5 patched with stable patch-2.6.17.13-vs2...
[linux-2.6.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_arp.h>
35 #include <linux/proc_fs.h>
36 #include <linux/skbuff.h>
37 #include <linux/netlink.h>
38 #include <linux/init.h>
39
40 #include <net/arp.h>
41 #include <net/ip.h>
42 #include <net/protocol.h>
43 #include <net/route.h>
44 #include <net/tcp.h>
45 #include <net/sock.h>
46 #include <net/ip_fib.h>
47 #include <net/ip_mp_alg.h>
48
49 #include "fib_lookup.h"
50
51 #define FSprintk(a...)
52
53 static DEFINE_RWLOCK(fib_info_lock);
54 static struct hlist_head *fib_info_hash;
55 static struct hlist_head *fib_info_laddrhash;
56 static unsigned int fib_hash_size;
57 static unsigned int fib_info_cnt;
58
59 #define DEVINDEX_HASHBITS 8
60 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63 #ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65 static DEFINE_SPINLOCK(fib_multipath_lock);
66
67 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73 #else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75 /* Hope, that gcc will optimize it to get rid of dummy loop */
76
77 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78 for (nhsel=0; nhsel < 1; nhsel++)
79
80 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81 for (nhsel=0; nhsel < 1; nhsel++)
82
83 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85 #define endfor_nexthops(fi) }
86
87
88 static const struct 
89 {
90         int     error;
91         u8      scope;
92 } fib_props[RTA_MAX + 1] = {
93         {
94                 .error  = 0,
95                 .scope  = RT_SCOPE_NOWHERE,
96         },      /* RTN_UNSPEC */
97         {
98                 .error  = 0,
99                 .scope  = RT_SCOPE_UNIVERSE,
100         },      /* RTN_UNICAST */
101         {
102                 .error  = 0,
103                 .scope  = RT_SCOPE_HOST,
104         },      /* RTN_LOCAL */
105         {
106                 .error  = 0,
107                 .scope  = RT_SCOPE_LINK,
108         },      /* RTN_BROADCAST */
109         {
110                 .error  = 0,
111                 .scope  = RT_SCOPE_LINK,
112         },      /* RTN_ANYCAST */
113         {
114                 .error  = 0,
115                 .scope  = RT_SCOPE_UNIVERSE,
116         },      /* RTN_MULTICAST */
117         {
118                 .error  = -EINVAL,
119                 .scope  = RT_SCOPE_UNIVERSE,
120         },      /* RTN_BLACKHOLE */
121         {
122                 .error  = -EHOSTUNREACH,
123                 .scope  = RT_SCOPE_UNIVERSE,
124         },      /* RTN_UNREACHABLE */
125         {
126                 .error  = -EACCES,
127                 .scope  = RT_SCOPE_UNIVERSE,
128         },      /* RTN_PROHIBIT */
129         {
130                 .error  = -EAGAIN,
131                 .scope  = RT_SCOPE_UNIVERSE,
132         },      /* RTN_THROW */
133         {
134                 .error  = -EINVAL,
135                 .scope  = RT_SCOPE_NOWHERE,
136         },      /* RTN_NAT */
137         {
138                 .error  = -EINVAL,
139                 .scope  = RT_SCOPE_NOWHERE,
140         },      /* RTN_XRESOLVE */
141 };
142
143
144 /* Release a nexthop info record */
145
146 void free_fib_info(struct fib_info *fi)
147 {
148         if (fi->fib_dead == 0) {
149                 printk("Freeing alive fib_info %p\n", fi);
150                 return;
151         }
152         change_nexthops(fi) {
153                 if (nh->nh_dev)
154                         dev_put(nh->nh_dev);
155                 nh->nh_dev = NULL;
156         } endfor_nexthops(fi);
157         fib_info_cnt--;
158         kfree(fi);
159 }
160
161 void fib_release_info(struct fib_info *fi)
162 {
163         write_lock_bh(&fib_info_lock);
164         if (fi && --fi->fib_treeref == 0) {
165                 hlist_del(&fi->fib_hash);
166                 if (fi->fib_prefsrc)
167                         hlist_del(&fi->fib_lhash);
168                 change_nexthops(fi) {
169                         if (!nh->nh_dev)
170                                 continue;
171                         hlist_del(&nh->nh_hash);
172                 } endfor_nexthops(fi)
173                 fi->fib_dead = 1;
174                 fib_info_put(fi);
175         }
176         write_unlock_bh(&fib_info_lock);
177 }
178
179 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180 {
181         const struct fib_nh *onh = ofi->fib_nh;
182
183         for_nexthops(fi) {
184                 if (nh->nh_oif != onh->nh_oif ||
185                     nh->nh_gw  != onh->nh_gw ||
186                     nh->nh_scope != onh->nh_scope ||
187 #ifdef CONFIG_IP_ROUTE_MULTIPATH
188                     nh->nh_weight != onh->nh_weight ||
189 #endif
190 #ifdef CONFIG_NET_CLS_ROUTE
191                     nh->nh_tclassid != onh->nh_tclassid ||
192 #endif
193                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194                         return -1;
195                 onh++;
196         } endfor_nexthops(fi);
197         return 0;
198 }
199
200 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201 {
202         unsigned int mask = (fib_hash_size - 1);
203         unsigned int val = fi->fib_nhs;
204
205         val ^= fi->fib_protocol;
206         val ^= fi->fib_prefsrc;
207         val ^= fi->fib_priority;
208
209         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210 }
211
212 static struct fib_info *fib_find_info(const struct fib_info *nfi)
213 {
214         struct hlist_head *head;
215         struct hlist_node *node;
216         struct fib_info *fi;
217         unsigned int hash;
218
219         hash = fib_info_hashfn(nfi);
220         head = &fib_info_hash[hash];
221
222         hlist_for_each_entry(fi, node, head, fib_hash) {
223                 if (fi->fib_nhs != nfi->fib_nhs)
224                         continue;
225                 if (nfi->fib_protocol == fi->fib_protocol &&
226                     nfi->fib_prefsrc == fi->fib_prefsrc &&
227                     nfi->fib_priority == fi->fib_priority &&
228                     memcmp(nfi->fib_metrics, fi->fib_metrics,
229                            sizeof(fi->fib_metrics)) == 0 &&
230                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232                         return fi;
233         }
234
235         return NULL;
236 }
237
238 static inline unsigned int fib_devindex_hashfn(unsigned int val)
239 {
240         unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242         return (val ^
243                 (val >> DEVINDEX_HASHBITS) ^
244                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245 }
246
247 /* Check, that the gateway is already configured.
248    Used only by redirect accept routine.
249  */
250
251 int ip_fib_check_default(u32 gw, struct net_device *dev)
252 {
253         struct hlist_head *head;
254         struct hlist_node *node;
255         struct fib_nh *nh;
256         unsigned int hash;
257
258         read_lock(&fib_info_lock);
259
260         hash = fib_devindex_hashfn(dev->ifindex);
261         head = &fib_info_devhash[hash];
262         hlist_for_each_entry(nh, node, head, nh_hash) {
263                 if (nh->nh_dev == dev &&
264                     nh->nh_gw == gw &&
265                     !(nh->nh_flags&RTNH_F_DEAD)) {
266                         read_unlock(&fib_info_lock);
267                         return 0;
268                 }
269         }
270
271         read_unlock(&fib_info_lock);
272
273         return -1;
274 }
275
276 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
277                int z, int tb_id,
278                struct nlmsghdr *n, struct netlink_skb_parms *req)
279 {
280         struct sk_buff *skb;
281         u32 pid = req ? req->pid : n->nlmsg_pid;
282         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
283
284         skb = alloc_skb(size, GFP_KERNEL);
285         if (!skb)
286                 return;
287
288         if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
289                           fa->fa_type, fa->fa_scope, &key, z,
290                           fa->fa_tos,
291                           fa->fa_info, 0) < 0) {
292                 kfree_skb(skb);
293                 return;
294         }
295         NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
296         if (n->nlmsg_flags&NLM_F_ECHO)
297                 atomic_inc(&skb->users);
298         netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
299         if (n->nlmsg_flags&NLM_F_ECHO)
300                 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
301 }
302
303 /* Return the first fib alias matching TOS with
304  * priority less than or equal to PRIO.
305  */
306 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
307 {
308         if (fah) {
309                 struct fib_alias *fa;
310                 list_for_each_entry(fa, fah, fa_list) {
311                         if (fa->fa_tos > tos)
312                                 continue;
313                         if (fa->fa_info->fib_priority >= prio ||
314                             fa->fa_tos < tos)
315                                 return fa;
316                 }
317         }
318         return NULL;
319 }
320
321 int fib_detect_death(struct fib_info *fi, int order,
322                      struct fib_info **last_resort, int *last_idx, int *dflt)
323 {
324         struct neighbour *n;
325         int state = NUD_NONE;
326
327         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
328         if (n) {
329                 state = n->nud_state;
330                 neigh_release(n);
331         }
332         if (state==NUD_REACHABLE)
333                 return 0;
334         if ((state&NUD_VALID) && order != *dflt)
335                 return 0;
336         if ((state&NUD_VALID) ||
337             (*last_idx<0 && order > *dflt)) {
338                 *last_resort = fi;
339                 *last_idx = order;
340         }
341         return 1;
342 }
343
344 #ifdef CONFIG_IP_ROUTE_MULTIPATH
345
346 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
347 {
348         while (RTA_OK(attr,attrlen)) {
349                 if (attr->rta_type == type)
350                         return *(u32*)RTA_DATA(attr);
351                 attr = RTA_NEXT(attr, attrlen);
352         }
353         return 0;
354 }
355
356 static int
357 fib_count_nexthops(struct rtattr *rta)
358 {
359         int nhs = 0;
360         struct rtnexthop *nhp = RTA_DATA(rta);
361         int nhlen = RTA_PAYLOAD(rta);
362
363         while (nhlen >= (int)sizeof(struct rtnexthop)) {
364                 if ((nhlen -= nhp->rtnh_len) < 0)
365                         return 0;
366                 nhs++;
367                 nhp = RTNH_NEXT(nhp);
368         };
369         return nhs;
370 }
371
372 static int
373 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
374 {
375         struct rtnexthop *nhp = RTA_DATA(rta);
376         int nhlen = RTA_PAYLOAD(rta);
377
378         change_nexthops(fi) {
379                 int attrlen = nhlen - sizeof(struct rtnexthop);
380                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
381                         return -EINVAL;
382                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
383                 nh->nh_oif = nhp->rtnh_ifindex;
384                 nh->nh_weight = nhp->rtnh_hops + 1;
385                 if (attrlen) {
386                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
387 #ifdef CONFIG_NET_CLS_ROUTE
388                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
389 #endif
390                 }
391                 nhp = RTNH_NEXT(nhp);
392         } endfor_nexthops(fi);
393         return 0;
394 }
395
396 #endif
397
398 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
399                  struct fib_info *fi)
400 {
401 #ifdef CONFIG_IP_ROUTE_MULTIPATH
402         struct rtnexthop *nhp;
403         int nhlen;
404 #endif
405
406         if (rta->rta_priority &&
407             *rta->rta_priority != fi->fib_priority)
408                 return 1;
409
410         if (rta->rta_oif || rta->rta_gw) {
411                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
412                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
413                         return 0;
414                 return 1;
415         }
416
417 #ifdef CONFIG_IP_ROUTE_MULTIPATH
418         if (rta->rta_mp == NULL)
419                 return 0;
420         nhp = RTA_DATA(rta->rta_mp);
421         nhlen = RTA_PAYLOAD(rta->rta_mp);
422         
423         for_nexthops(fi) {
424                 int attrlen = nhlen - sizeof(struct rtnexthop);
425                 u32 gw;
426
427                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
428                         return -EINVAL;
429                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
430                         return 1;
431                 if (attrlen) {
432                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
433                         if (gw && gw != nh->nh_gw)
434                                 return 1;
435 #ifdef CONFIG_NET_CLS_ROUTE
436                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
437                         if (gw && gw != nh->nh_tclassid)
438                                 return 1;
439 #endif
440                 }
441                 nhp = RTNH_NEXT(nhp);
442         } endfor_nexthops(fi);
443 #endif
444         return 0;
445 }
446
447
448 /*
449    Picture
450    -------
451
452    Semantics of nexthop is very messy by historical reasons.
453    We have to take into account, that:
454    a) gateway can be actually local interface address,
455       so that gatewayed route is direct.
456    b) gateway must be on-link address, possibly
457       described not by an ifaddr, but also by a direct route.
458    c) If both gateway and interface are specified, they should not
459       contradict.
460    d) If we use tunnel routes, gateway could be not on-link.
461
462    Attempt to reconcile all of these (alas, self-contradictory) conditions
463    results in pretty ugly and hairy code with obscure logic.
464
465    I chose to generalized it instead, so that the size
466    of code does not increase practically, but it becomes
467    much more general.
468    Every prefix is assigned a "scope" value: "host" is local address,
469    "link" is direct route,
470    [ ... "site" ... "interior" ... ]
471    and "universe" is true gateway route with global meaning.
472
473    Every prefix refers to a set of "nexthop"s (gw, oif),
474    where gw must have narrower scope. This recursion stops
475    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
476    which means that gw is forced to be on link.
477
478    Code is still hairy, but now it is apparently logically
479    consistent and very flexible. F.e. as by-product it allows
480    to co-exists in peace independent exterior and interior
481    routing processes.
482
483    Normally it looks as following.
484
485    {universe prefix}  -> (gw, oif) [scope link]
486                           |
487                           |-> {link prefix} -> (gw, oif) [scope local]
488                                                 |
489                                                 |-> {local prefix} (terminal node)
490  */
491
492 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
493 {
494         int err;
495
496         if (nh->nh_gw) {
497                 struct fib_result res;
498
499 #ifdef CONFIG_IP_ROUTE_PERVASIVE
500                 if (nh->nh_flags&RTNH_F_PERVASIVE)
501                         return 0;
502 #endif
503                 if (nh->nh_flags&RTNH_F_ONLINK) {
504                         struct net_device *dev;
505
506                         if (r->rtm_scope >= RT_SCOPE_LINK)
507                                 return -EINVAL;
508                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
509                                 return -EINVAL;
510                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
511                                 return -ENODEV;
512                         if (!(dev->flags&IFF_UP))
513                                 return -ENETDOWN;
514                         nh->nh_dev = dev;
515                         dev_hold(dev);
516                         nh->nh_scope = RT_SCOPE_LINK;
517                         return 0;
518                 }
519                 {
520                         struct flowi fl = { .nl_u = { .ip4_u =
521                                                       { .daddr = nh->nh_gw,
522                                                         .scope = r->rtm_scope + 1 } },
523                                             .oif = nh->nh_oif };
524
525                         /* It is not necessary, but requires a bit of thinking */
526                         if (fl.fl4_scope < RT_SCOPE_LINK)
527                                 fl.fl4_scope = RT_SCOPE_LINK;
528                         if ((err = fib_lookup(&fl, &res)) != 0)
529                                 return err;
530                 }
531                 err = -EINVAL;
532                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
533                         goto out;
534                 nh->nh_scope = res.scope;
535                 nh->nh_oif = FIB_RES_OIF(res);
536                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
537                         goto out;
538                 dev_hold(nh->nh_dev);
539                 err = -ENETDOWN;
540                 if (!(nh->nh_dev->flags & IFF_UP))
541                         goto out;
542                 err = 0;
543 out:
544                 fib_res_put(&res);
545                 return err;
546         } else {
547                 struct in_device *in_dev;
548
549                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
550                         return -EINVAL;
551
552                 in_dev = inetdev_by_index(nh->nh_oif);
553                 if (in_dev == NULL)
554                         return -ENODEV;
555                 if (!(in_dev->dev->flags&IFF_UP)) {
556                         in_dev_put(in_dev);
557                         return -ENETDOWN;
558                 }
559                 nh->nh_dev = in_dev->dev;
560                 dev_hold(nh->nh_dev);
561                 nh->nh_scope = RT_SCOPE_HOST;
562                 in_dev_put(in_dev);
563         }
564         return 0;
565 }
566
567 static inline unsigned int fib_laddr_hashfn(u32 val)
568 {
569         unsigned int mask = (fib_hash_size - 1);
570
571         return (val ^ (val >> 7) ^ (val >> 14)) & mask;
572 }
573
574 static struct hlist_head *fib_hash_alloc(int bytes)
575 {
576         if (bytes <= PAGE_SIZE)
577                 return kmalloc(bytes, GFP_KERNEL);
578         else
579                 return (struct hlist_head *)
580                         __get_free_pages(GFP_KERNEL, get_order(bytes));
581 }
582
583 static void fib_hash_free(struct hlist_head *hash, int bytes)
584 {
585         if (!hash)
586                 return;
587
588         if (bytes <= PAGE_SIZE)
589                 kfree(hash);
590         else
591                 free_pages((unsigned long) hash, get_order(bytes));
592 }
593
594 static void fib_hash_move(struct hlist_head *new_info_hash,
595                           struct hlist_head *new_laddrhash,
596                           unsigned int new_size)
597 {
598         struct hlist_head *old_info_hash, *old_laddrhash;
599         unsigned int old_size = fib_hash_size;
600         unsigned int i, bytes;
601
602         write_lock_bh(&fib_info_lock);
603         old_info_hash = fib_info_hash;
604         old_laddrhash = fib_info_laddrhash;
605         fib_hash_size = new_size;
606
607         for (i = 0; i < old_size; i++) {
608                 struct hlist_head *head = &fib_info_hash[i];
609                 struct hlist_node *node, *n;
610                 struct fib_info *fi;
611
612                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
613                         struct hlist_head *dest;
614                         unsigned int new_hash;
615
616                         hlist_del(&fi->fib_hash);
617
618                         new_hash = fib_info_hashfn(fi);
619                         dest = &new_info_hash[new_hash];
620                         hlist_add_head(&fi->fib_hash, dest);
621                 }
622         }
623         fib_info_hash = new_info_hash;
624
625         for (i = 0; i < old_size; i++) {
626                 struct hlist_head *lhead = &fib_info_laddrhash[i];
627                 struct hlist_node *node, *n;
628                 struct fib_info *fi;
629
630                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
631                         struct hlist_head *ldest;
632                         unsigned int new_hash;
633
634                         hlist_del(&fi->fib_lhash);
635
636                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
637                         ldest = &new_laddrhash[new_hash];
638                         hlist_add_head(&fi->fib_lhash, ldest);
639                 }
640         }
641         fib_info_laddrhash = new_laddrhash;
642
643         write_unlock_bh(&fib_info_lock);
644
645         bytes = old_size * sizeof(struct hlist_head *);
646         fib_hash_free(old_info_hash, bytes);
647         fib_hash_free(old_laddrhash, bytes);
648 }
649
650 struct fib_info *
651 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
652                 const struct nlmsghdr *nlh, int *errp)
653 {
654         int err;
655         struct fib_info *fi = NULL;
656         struct fib_info *ofi;
657 #ifdef CONFIG_IP_ROUTE_MULTIPATH
658         int nhs = 1;
659 #else
660         const int nhs = 1;
661 #endif
662 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
663         u32 mp_alg = IP_MP_ALG_NONE;
664 #endif
665
666         /* Fast check to catch the most weird cases */
667         if (fib_props[r->rtm_type].scope > r->rtm_scope)
668                 goto err_inval;
669
670 #ifdef CONFIG_IP_ROUTE_MULTIPATH
671         if (rta->rta_mp) {
672                 nhs = fib_count_nexthops(rta->rta_mp);
673                 if (nhs == 0)
674                         goto err_inval;
675         }
676 #endif
677 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
678         if (rta->rta_mp_alg) {
679                 mp_alg = *rta->rta_mp_alg;
680
681                 if (mp_alg < IP_MP_ALG_NONE ||
682                     mp_alg > IP_MP_ALG_MAX)
683                         goto err_inval;
684         }
685 #endif
686
687         err = -ENOBUFS;
688         if (fib_info_cnt >= fib_hash_size) {
689                 unsigned int new_size = fib_hash_size << 1;
690                 struct hlist_head *new_info_hash;
691                 struct hlist_head *new_laddrhash;
692                 unsigned int bytes;
693
694                 if (!new_size)
695                         new_size = 1;
696                 bytes = new_size * sizeof(struct hlist_head *);
697                 new_info_hash = fib_hash_alloc(bytes);
698                 new_laddrhash = fib_hash_alloc(bytes);
699                 if (!new_info_hash || !new_laddrhash) {
700                         fib_hash_free(new_info_hash, bytes);
701                         fib_hash_free(new_laddrhash, bytes);
702                 } else {
703                         memset(new_info_hash, 0, bytes);
704                         memset(new_laddrhash, 0, bytes);
705
706                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
707                 }
708
709                 if (!fib_hash_size)
710                         goto failure;
711         }
712
713         fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
714         if (fi == NULL)
715                 goto failure;
716         fib_info_cnt++;
717         memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
718
719         fi->fib_protocol = r->rtm_protocol;
720
721         fi->fib_nhs = nhs;
722         change_nexthops(fi) {
723                 nh->nh_parent = fi;
724         } endfor_nexthops(fi)
725
726         fi->fib_flags = r->rtm_flags;
727         if (rta->rta_priority)
728                 fi->fib_priority = *rta->rta_priority;
729         if (rta->rta_mx) {
730                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
731                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
732
733                 while (RTA_OK(attr, attrlen)) {
734                         unsigned flavor = attr->rta_type;
735                         if (flavor) {
736                                 if (flavor > RTAX_MAX)
737                                         goto err_inval;
738                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
739                         }
740                         attr = RTA_NEXT(attr, attrlen);
741                 }
742         }
743         if (rta->rta_prefsrc)
744                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
745
746         if (rta->rta_mp) {
747 #ifdef CONFIG_IP_ROUTE_MULTIPATH
748                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
749                         goto failure;
750                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
751                         goto err_inval;
752                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
753                         goto err_inval;
754 #ifdef CONFIG_NET_CLS_ROUTE
755                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
756                         goto err_inval;
757 #endif
758 #else
759                 goto err_inval;
760 #endif
761         } else {
762                 struct fib_nh *nh = fi->fib_nh;
763                 if (rta->rta_oif)
764                         nh->nh_oif = *rta->rta_oif;
765                 if (rta->rta_gw)
766                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
767 #ifdef CONFIG_NET_CLS_ROUTE
768                 if (rta->rta_flow)
769                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
770 #endif
771                 nh->nh_flags = r->rtm_flags;
772 #ifdef CONFIG_IP_ROUTE_MULTIPATH
773                 nh->nh_weight = 1;
774 #endif
775         }
776
777 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
778         fi->fib_mp_alg = mp_alg;
779 #endif
780
781         if (fib_props[r->rtm_type].error) {
782                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
783                         goto err_inval;
784                 goto link_it;
785         }
786
787         if (r->rtm_scope > RT_SCOPE_HOST)
788                 goto err_inval;
789
790         if (r->rtm_scope == RT_SCOPE_HOST) {
791                 struct fib_nh *nh = fi->fib_nh;
792
793                 /* Local address is added. */
794                 if (nhs != 1 || nh->nh_gw)
795                         goto err_inval;
796                 nh->nh_scope = RT_SCOPE_NOWHERE;
797                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
798                 err = -ENODEV;
799                 if (nh->nh_dev == NULL)
800                         goto failure;
801         } else {
802                 change_nexthops(fi) {
803                         if ((err = fib_check_nh(r, fi, nh)) != 0)
804                                 goto failure;
805                 } endfor_nexthops(fi)
806         }
807
808         if (fi->fib_prefsrc) {
809                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
810                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
811                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
812                                 goto err_inval;
813         }
814
815 link_it:
816         if ((ofi = fib_find_info(fi)) != NULL) {
817                 fi->fib_dead = 1;
818                 free_fib_info(fi);
819                 ofi->fib_treeref++;
820                 return ofi;
821         }
822
823         fi->fib_treeref++;
824         atomic_inc(&fi->fib_clntref);
825         write_lock_bh(&fib_info_lock);
826         hlist_add_head(&fi->fib_hash,
827                        &fib_info_hash[fib_info_hashfn(fi)]);
828         if (fi->fib_prefsrc) {
829                 struct hlist_head *head;
830
831                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
832                 hlist_add_head(&fi->fib_lhash, head);
833         }
834         change_nexthops(fi) {
835                 struct hlist_head *head;
836                 unsigned int hash;
837
838                 if (!nh->nh_dev)
839                         continue;
840                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
841                 head = &fib_info_devhash[hash];
842                 hlist_add_head(&nh->nh_hash, head);
843         } endfor_nexthops(fi)
844         write_unlock_bh(&fib_info_lock);
845         return fi;
846
847 err_inval:
848         err = -EINVAL;
849
850 failure:
851         *errp = err;
852         if (fi) {
853                 fi->fib_dead = 1;
854                 free_fib_info(fi);
855         }
856         return NULL;
857 }
858
859 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
860 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
861                        struct fib_result *res, __u32 zone, __u32 mask, 
862                         int prefixlen)
863 {
864         struct fib_alias *fa;
865         int nh_sel = 0;
866
867         list_for_each_entry_rcu(fa, head, fa_list) {
868                 int err;
869
870                 if (fa->fa_tos &&
871                     fa->fa_tos != flp->fl4_tos)
872                         continue;
873
874                 if (fa->fa_scope < flp->fl4_scope)
875                         continue;
876
877                 fa->fa_state |= FA_S_ACCESSED;
878
879                 err = fib_props[fa->fa_type].error;
880                 if (err == 0) {
881                         struct fib_info *fi = fa->fa_info;
882
883                         if (fi->fib_flags & RTNH_F_DEAD)
884                                 continue;
885
886                         switch (fa->fa_type) {
887                         case RTN_UNICAST:
888                         case RTN_LOCAL:
889                         case RTN_BROADCAST:
890                         case RTN_ANYCAST:
891                         case RTN_MULTICAST:
892                                 for_nexthops(fi) {
893                                         if (nh->nh_flags&RTNH_F_DEAD)
894                                                 continue;
895                                         if (!flp->oif || flp->oif == nh->nh_oif)
896                                                 break;
897                                 }
898 #ifdef CONFIG_IP_ROUTE_MULTIPATH
899                                 if (nhsel < fi->fib_nhs) {
900                                         nh_sel = nhsel;
901                                         goto out_fill_res;
902                                 }
903 #else
904                                 if (nhsel < 1) {
905                                         goto out_fill_res;
906                                 }
907 #endif
908                                 endfor_nexthops(fi);
909                                 continue;
910
911                         default:
912                                 printk(KERN_DEBUG "impossible 102\n");
913                                 return -EINVAL;
914                         };
915                 }
916                 return err;
917         }
918         return 1;
919
920 out_fill_res:
921         res->prefixlen = prefixlen;
922         res->nh_sel = nh_sel;
923         res->type = fa->fa_type;
924         res->scope = fa->fa_scope;
925         res->fi = fa->fa_info;
926 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
927         res->netmask = mask;
928         res->network = zone &
929                 (0xFFFFFFFF >> (32 - prefixlen));
930 #endif
931         atomic_inc(&res->fi->fib_clntref);
932         return 0;
933 }
934
935 /* Find appropriate source address to this destination */
936
937 u32 __fib_res_prefsrc(struct fib_result *res)
938 {
939         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
940 }
941
942 int
943 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
944               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
945               struct fib_info *fi, unsigned int flags)
946 {
947         struct rtmsg *rtm;
948         struct nlmsghdr  *nlh;
949         unsigned char    *b = skb->tail;
950
951         nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
952         rtm = NLMSG_DATA(nlh);
953         rtm->rtm_family = AF_INET;
954         rtm->rtm_dst_len = dst_len;
955         rtm->rtm_src_len = 0;
956         rtm->rtm_tos = tos;
957         rtm->rtm_table = tb_id;
958         rtm->rtm_type = type;
959         rtm->rtm_flags = fi->fib_flags;
960         rtm->rtm_scope = scope;
961         if (rtm->rtm_dst_len)
962                 RTA_PUT(skb, RTA_DST, 4, dst);
963         rtm->rtm_protocol = fi->fib_protocol;
964         if (fi->fib_priority)
965                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
966 #ifdef CONFIG_NET_CLS_ROUTE
967         if (fi->fib_nh[0].nh_tclassid)
968                 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
969 #endif
970         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
971                 goto rtattr_failure;
972         if (fi->fib_prefsrc)
973                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
974         if (fi->fib_nhs == 1) {
975                 if (fi->fib_nh->nh_gw)
976                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
977                 if (fi->fib_nh->nh_oif)
978                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
979         }
980 #ifdef CONFIG_IP_ROUTE_MULTIPATH
981         if (fi->fib_nhs > 1) {
982                 struct rtnexthop *nhp;
983                 struct rtattr *mp_head;
984                 if (skb_tailroom(skb) <= RTA_SPACE(0))
985                         goto rtattr_failure;
986                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
987
988                 for_nexthops(fi) {
989                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
990                                 goto rtattr_failure;
991                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
992                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
993                         nhp->rtnh_hops = nh->nh_weight-1;
994                         nhp->rtnh_ifindex = nh->nh_oif;
995                         if (nh->nh_gw)
996                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
997                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
998                 } endfor_nexthops(fi);
999                 mp_head->rta_type = RTA_MULTIPATH;
1000                 mp_head->rta_len = skb->tail - (u8*)mp_head;
1001         }
1002 #endif
1003         nlh->nlmsg_len = skb->tail - b;
1004         return skb->len;
1005
1006 nlmsg_failure:
1007 rtattr_failure:
1008         skb_trim(skb, b - skb->data);
1009         return -1;
1010 }
1011
1012 #ifndef CONFIG_IP_NOSIOCRT
1013
1014 int
1015 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1016                     struct kern_rta *rta, struct rtentry *r)
1017 {
1018         int    plen;
1019         u32    *ptr;
1020
1021         memset(rtm, 0, sizeof(*rtm));
1022         memset(rta, 0, sizeof(*rta));
1023
1024         if (r->rt_dst.sa_family != AF_INET)
1025                 return -EAFNOSUPPORT;
1026
1027         /* Check mask for validity:
1028            a) it must be contiguous.
1029            b) destination must have all host bits clear.
1030            c) if application forgot to set correct family (AF_INET),
1031               reject request unless it is absolutely clear i.e.
1032               both family and mask are zero.
1033          */
1034         plen = 32;
1035         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1036         if (!(r->rt_flags&RTF_HOST)) {
1037                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1038                 if (r->rt_genmask.sa_family != AF_INET) {
1039                         if (mask || r->rt_genmask.sa_family)
1040                                 return -EAFNOSUPPORT;
1041                 }
1042                 if (bad_mask(mask, *ptr))
1043                         return -EINVAL;
1044                 plen = inet_mask_len(mask);
1045         }
1046
1047         nl->nlmsg_flags = NLM_F_REQUEST;
1048         nl->nlmsg_pid = 0;
1049         nl->nlmsg_seq = 0;
1050         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1051         if (cmd == SIOCDELRT) {
1052                 nl->nlmsg_type = RTM_DELROUTE;
1053                 nl->nlmsg_flags = 0;
1054         } else {
1055                 nl->nlmsg_type = RTM_NEWROUTE;
1056                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1057                 rtm->rtm_protocol = RTPROT_BOOT;
1058         }
1059
1060         rtm->rtm_dst_len = plen;
1061         rta->rta_dst = ptr;
1062
1063         if (r->rt_metric) {
1064                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1065                 rta->rta_priority = (u32*)&r->rt_pad3;
1066         }
1067         if (r->rt_flags&RTF_REJECT) {
1068                 rtm->rtm_scope = RT_SCOPE_HOST;
1069                 rtm->rtm_type = RTN_UNREACHABLE;
1070                 return 0;
1071         }
1072         rtm->rtm_scope = RT_SCOPE_NOWHERE;
1073         rtm->rtm_type = RTN_UNICAST;
1074
1075         if (r->rt_dev) {
1076                 char *colon;
1077                 struct net_device *dev;
1078                 char   devname[IFNAMSIZ];
1079
1080                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1081                         return -EFAULT;
1082                 devname[IFNAMSIZ-1] = 0;
1083                 colon = strchr(devname, ':');
1084                 if (colon)
1085                         *colon = 0;
1086                 dev = __dev_get_by_name(devname);
1087                 if (!dev)
1088                         return -ENODEV;
1089                 rta->rta_oif = &dev->ifindex;
1090                 if (colon) {
1091                         struct in_ifaddr *ifa;
1092                         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1093                         if (!in_dev)
1094                                 return -ENODEV;
1095                         *colon = ':';
1096                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1097                                 if (strcmp(ifa->ifa_label, devname) == 0)
1098                                         break;
1099                         if (ifa == NULL)
1100                                 return -ENODEV;
1101                         rta->rta_prefsrc = &ifa->ifa_local;
1102                 }
1103         }
1104
1105         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1106         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1107                 rta->rta_gw = ptr;
1108                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1109                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1110         }
1111
1112         if (cmd == SIOCDELRT)
1113                 return 0;
1114
1115         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1116                 return -EINVAL;
1117
1118         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1119                 rtm->rtm_scope = RT_SCOPE_LINK;
1120
1121         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1122                 struct rtattr *rec;
1123                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1124                 if (mx == NULL)
1125                         return -ENOMEM;
1126                 rta->rta_mx = mx;
1127                 mx->rta_type = RTA_METRICS;
1128                 mx->rta_len  = RTA_LENGTH(0);
1129                 if (r->rt_flags&RTF_MTU) {
1130                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1131                         rec->rta_type = RTAX_ADVMSS;
1132                         rec->rta_len = RTA_LENGTH(4);
1133                         mx->rta_len += RTA_LENGTH(4);
1134                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1135                 }
1136                 if (r->rt_flags&RTF_WINDOW) {
1137                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1138                         rec->rta_type = RTAX_WINDOW;
1139                         rec->rta_len = RTA_LENGTH(4);
1140                         mx->rta_len += RTA_LENGTH(4);
1141                         *(u32*)RTA_DATA(rec) = r->rt_window;
1142                 }
1143                 if (r->rt_flags&RTF_IRTT) {
1144                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1145                         rec->rta_type = RTAX_RTT;
1146                         rec->rta_len = RTA_LENGTH(4);
1147                         mx->rta_len += RTA_LENGTH(4);
1148                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1149                 }
1150         }
1151         return 0;
1152 }
1153
1154 #endif
1155
1156 /*
1157    Update FIB if:
1158    - local address disappeared -> we must delete all the entries
1159      referring to it.
1160    - device went down -> we must shutdown all nexthops going via it.
1161  */
1162
1163 int fib_sync_down(u32 local, struct net_device *dev, int force)
1164 {
1165         int ret = 0;
1166         int scope = RT_SCOPE_NOWHERE;
1167         
1168         if (force)
1169                 scope = -1;
1170
1171         if (local && fib_info_laddrhash) {
1172                 unsigned int hash = fib_laddr_hashfn(local);
1173                 struct hlist_head *head = &fib_info_laddrhash[hash];
1174                 struct hlist_node *node;
1175                 struct fib_info *fi;
1176
1177                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1178                         if (fi->fib_prefsrc == local) {
1179                                 fi->fib_flags |= RTNH_F_DEAD;
1180                                 ret++;
1181                         }
1182                 }
1183         }
1184
1185         if (dev) {
1186                 struct fib_info *prev_fi = NULL;
1187                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1188                 struct hlist_head *head = &fib_info_devhash[hash];
1189                 struct hlist_node *node;
1190                 struct fib_nh *nh;
1191
1192                 hlist_for_each_entry(nh, node, head, nh_hash) {
1193                         struct fib_info *fi = nh->nh_parent;
1194                         int dead;
1195
1196                         BUG_ON(!fi->fib_nhs);
1197                         if (nh->nh_dev != dev || fi == prev_fi)
1198                                 continue;
1199                         prev_fi = fi;
1200                         dead = 0;
1201                         change_nexthops(fi) {
1202                                 if (nh->nh_flags&RTNH_F_DEAD)
1203                                         dead++;
1204                                 else if (nh->nh_dev == dev &&
1205                                          nh->nh_scope != scope) {
1206                                         nh->nh_flags |= RTNH_F_DEAD;
1207 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1208                                         spin_lock_bh(&fib_multipath_lock);
1209                                         fi->fib_power -= nh->nh_power;
1210                                         nh->nh_power = 0;
1211                                         spin_unlock_bh(&fib_multipath_lock);
1212 #endif
1213                                         dead++;
1214                                 }
1215 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1216                                 if (force > 1 && nh->nh_dev == dev) {
1217                                         dead = fi->fib_nhs;
1218                                         break;
1219                                 }
1220 #endif
1221                         } endfor_nexthops(fi)
1222                         if (dead == fi->fib_nhs) {
1223                                 fi->fib_flags |= RTNH_F_DEAD;
1224                                 ret++;
1225                         }
1226                 }
1227         }
1228
1229         return ret;
1230 }
1231
1232 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1233
1234 /*
1235    Dead device goes up. We wake up dead nexthops.
1236    It takes sense only on multipath routes.
1237  */
1238
1239 int fib_sync_up(struct net_device *dev)
1240 {
1241         struct fib_info *prev_fi;
1242         unsigned int hash;
1243         struct hlist_head *head;
1244         struct hlist_node *node;
1245         struct fib_nh *nh;
1246         int ret;
1247
1248         if (!(dev->flags&IFF_UP))
1249                 return 0;
1250
1251         prev_fi = NULL;
1252         hash = fib_devindex_hashfn(dev->ifindex);
1253         head = &fib_info_devhash[hash];
1254         ret = 0;
1255
1256         hlist_for_each_entry(nh, node, head, nh_hash) {
1257                 struct fib_info *fi = nh->nh_parent;
1258                 int alive;
1259
1260                 BUG_ON(!fi->fib_nhs);
1261                 if (nh->nh_dev != dev || fi == prev_fi)
1262                         continue;
1263
1264                 prev_fi = fi;
1265                 alive = 0;
1266                 change_nexthops(fi) {
1267                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1268                                 alive++;
1269                                 continue;
1270                         }
1271                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1272                                 continue;
1273                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1274                                 continue;
1275                         alive++;
1276                         spin_lock_bh(&fib_multipath_lock);
1277                         nh->nh_power = 0;
1278                         nh->nh_flags &= ~RTNH_F_DEAD;
1279                         spin_unlock_bh(&fib_multipath_lock);
1280                 } endfor_nexthops(fi)
1281
1282                 if (alive > 0) {
1283                         fi->fib_flags &= ~RTNH_F_DEAD;
1284                         ret++;
1285                 }
1286         }
1287
1288         return ret;
1289 }
1290
1291 /*
1292    The algorithm is suboptimal, but it provides really
1293    fair weighted route distribution.
1294  */
1295
1296 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1297 {
1298         struct fib_info *fi = res->fi;
1299         int w;
1300
1301         spin_lock_bh(&fib_multipath_lock);
1302         if (fi->fib_power <= 0) {
1303                 int power = 0;
1304                 change_nexthops(fi) {
1305                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1306                                 power += nh->nh_weight;
1307                                 nh->nh_power = nh->nh_weight;
1308                         }
1309                 } endfor_nexthops(fi);
1310                 fi->fib_power = power;
1311                 if (power <= 0) {
1312                         spin_unlock_bh(&fib_multipath_lock);
1313                         /* Race condition: route has just become dead. */
1314                         res->nh_sel = 0;
1315                         return;
1316                 }
1317         }
1318
1319
1320         /* w should be random number [0..fi->fib_power-1],
1321            it is pretty bad approximation.
1322          */
1323
1324         w = jiffies % fi->fib_power;
1325
1326         change_nexthops(fi) {
1327                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1328                         if ((w -= nh->nh_power) <= 0) {
1329                                 nh->nh_power--;
1330                                 fi->fib_power--;
1331                                 res->nh_sel = nhsel;
1332                                 spin_unlock_bh(&fib_multipath_lock);
1333                                 return;
1334                         }
1335                 }
1336         } endfor_nexthops(fi);
1337
1338         /* Race condition: route has just become dead. */
1339         res->nh_sel = 0;
1340         spin_unlock_bh(&fib_multipath_lock);
1341 }
1342 #endif