vserver 1.9.5.x5
[linux-2.6.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45
46 #include "fib_lookup.h"
47
48 #define FSprintk(a...)
49
50 static DEFINE_RWLOCK(fib_info_lock);
51 static struct hlist_head *fib_info_hash;
52 static struct hlist_head *fib_info_laddrhash;
53 static unsigned int fib_hash_size;
54 static unsigned int fib_info_cnt;
55
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62 static DEFINE_SPINLOCK(fib_multipath_lock);
63
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
73
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
79
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82 #define endfor_nexthops(fi) }
83
84
85 static struct 
86 {
87         int     error;
88         u8      scope;
89 } fib_props[RTA_MAX + 1] = {
90         {
91                 .error  = 0,
92                 .scope  = RT_SCOPE_NOWHERE,
93         },      /* RTN_UNSPEC */
94         {
95                 .error  = 0,
96                 .scope  = RT_SCOPE_UNIVERSE,
97         },      /* RTN_UNICAST */
98         {
99                 .error  = 0,
100                 .scope  = RT_SCOPE_HOST,
101         },      /* RTN_LOCAL */
102         {
103                 .error  = 0,
104                 .scope  = RT_SCOPE_LINK,
105         },      /* RTN_BROADCAST */
106         {
107                 .error  = 0,
108                 .scope  = RT_SCOPE_LINK,
109         },      /* RTN_ANYCAST */
110         {
111                 .error  = 0,
112                 .scope  = RT_SCOPE_UNIVERSE,
113         },      /* RTN_MULTICAST */
114         {
115                 .error  = -EINVAL,
116                 .scope  = RT_SCOPE_UNIVERSE,
117         },      /* RTN_BLACKHOLE */
118         {
119                 .error  = -EHOSTUNREACH,
120                 .scope  = RT_SCOPE_UNIVERSE,
121         },      /* RTN_UNREACHABLE */
122         {
123                 .error  = -EACCES,
124                 .scope  = RT_SCOPE_UNIVERSE,
125         },      /* RTN_PROHIBIT */
126         {
127                 .error  = -EAGAIN,
128                 .scope  = RT_SCOPE_UNIVERSE,
129         },      /* RTN_THROW */
130         {
131                 .error  = -EINVAL,
132                 .scope  = RT_SCOPE_NOWHERE,
133         },      /* RTN_NAT */
134         {
135                 .error  = -EINVAL,
136                 .scope  = RT_SCOPE_NOWHERE,
137         },      /* RTN_XRESOLVE */
138 };
139
140
141 /* Release a nexthop info record */
142
143 void free_fib_info(struct fib_info *fi)
144 {
145         if (fi->fib_dead == 0) {
146                 printk("Freeing alive fib_info %p\n", fi);
147                 return;
148         }
149         change_nexthops(fi) {
150                 if (nh->nh_dev)
151                         dev_put(nh->nh_dev);
152                 nh->nh_dev = NULL;
153         } endfor_nexthops(fi);
154         fib_info_cnt--;
155         kfree(fi);
156 }
157
158 void fib_release_info(struct fib_info *fi)
159 {
160         write_lock(&fib_info_lock);
161         if (fi && --fi->fib_treeref == 0) {
162                 hlist_del(&fi->fib_hash);
163                 if (fi->fib_prefsrc)
164                         hlist_del(&fi->fib_lhash);
165                 change_nexthops(fi) {
166                         if (!nh->nh_dev)
167                                 continue;
168                         hlist_del(&nh->nh_hash);
169                 } endfor_nexthops(fi)
170                 fi->fib_dead = 1;
171                 fib_info_put(fi);
172         }
173         write_unlock(&fib_info_lock);
174 }
175
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177 {
178         const struct fib_nh *onh = ofi->fib_nh;
179
180         for_nexthops(fi) {
181                 if (nh->nh_oif != onh->nh_oif ||
182                     nh->nh_gw  != onh->nh_gw ||
183                     nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185                     nh->nh_weight != onh->nh_weight ||
186 #endif
187 #ifdef CONFIG_NET_CLS_ROUTE
188                     nh->nh_tclassid != onh->nh_tclassid ||
189 #endif
190                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191                         return -1;
192                 onh++;
193         } endfor_nexthops(fi);
194         return 0;
195 }
196
197 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
198 {
199         unsigned int mask = (fib_hash_size - 1);
200         unsigned int val = fi->fib_nhs;
201
202         val ^= fi->fib_protocol;
203         val ^= fi->fib_prefsrc;
204         val ^= fi->fib_priority;
205
206         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
207 }
208
209 static struct fib_info *fib_find_info(const struct fib_info *nfi)
210 {
211         struct hlist_head *head;
212         struct hlist_node *node;
213         struct fib_info *fi;
214         unsigned int hash;
215
216         hash = fib_info_hashfn(nfi);
217         head = &fib_info_hash[hash];
218
219         hlist_for_each_entry(fi, node, head, fib_hash) {
220                 if (fi->fib_nhs != nfi->fib_nhs)
221                         continue;
222                 if (nfi->fib_protocol == fi->fib_protocol &&
223                     nfi->fib_prefsrc == fi->fib_prefsrc &&
224                     nfi->fib_priority == fi->fib_priority &&
225                     memcmp(nfi->fib_metrics, fi->fib_metrics,
226                            sizeof(fi->fib_metrics)) == 0 &&
227                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
228                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
229                         return fi;
230         }
231
232         return NULL;
233 }
234
235 static inline unsigned int fib_devindex_hashfn(unsigned int val)
236 {
237         unsigned int mask = DEVINDEX_HASHSIZE - 1;
238
239         return (val ^
240                 (val >> DEVINDEX_HASHBITS) ^
241                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
242 }
243
244 /* Check, that the gateway is already configured.
245    Used only by redirect accept routine.
246  */
247
248 int ip_fib_check_default(u32 gw, struct net_device *dev)
249 {
250         struct hlist_head *head;
251         struct hlist_node *node;
252         struct fib_nh *nh;
253         unsigned int hash;
254
255         read_lock(&fib_info_lock);
256
257         hash = fib_devindex_hashfn(dev->ifindex);
258         head = &fib_info_devhash[hash];
259         hlist_for_each_entry(nh, node, head, nh_hash) {
260                 if (nh->nh_dev == dev &&
261                     nh->nh_gw == gw &&
262                     !(nh->nh_flags&RTNH_F_DEAD)) {
263                         read_unlock(&fib_info_lock);
264                         return 0;
265                 }
266         }
267
268         read_unlock(&fib_info_lock);
269
270         return -1;
271 }
272
273 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
274                int z, int tb_id,
275                struct nlmsghdr *n, struct netlink_skb_parms *req)
276 {
277         struct sk_buff *skb;
278         u32 pid = req ? req->pid : 0;
279         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
280
281         skb = alloc_skb(size, GFP_KERNEL);
282         if (!skb)
283                 return;
284
285         if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
286                           fa->fa_type, fa->fa_scope, &key, z,
287                           fa->fa_tos,
288                           fa->fa_info) < 0) {
289                 kfree_skb(skb);
290                 return;
291         }
292         NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
293         if (n->nlmsg_flags&NLM_F_ECHO)
294                 atomic_inc(&skb->users);
295         netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
296         if (n->nlmsg_flags&NLM_F_ECHO)
297                 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
298 }
299
300 /* Return the first fib alias matching TOS with
301  * priority less than or equal to PRIO.
302  */
303 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
304 {
305         if (fah) {
306                 struct fib_alias *fa;
307                 list_for_each_entry(fa, fah, fa_list) {
308                         if (fa->fa_tos > tos)
309                                 continue;
310                         if (fa->fa_info->fib_priority >= prio ||
311                             fa->fa_tos < tos)
312                                 return fa;
313                 }
314         }
315         return NULL;
316 }
317
318 int fib_detect_death(struct fib_info *fi, int order,
319                      struct fib_info **last_resort, int *last_idx, int *dflt)
320 {
321         struct neighbour *n;
322         int state = NUD_NONE;
323
324         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
325         if (n) {
326                 state = n->nud_state;
327                 neigh_release(n);
328         }
329         if (state==NUD_REACHABLE)
330                 return 0;
331         if ((state&NUD_VALID) && order != *dflt)
332                 return 0;
333         if ((state&NUD_VALID) ||
334             (*last_idx<0 && order > *dflt)) {
335                 *last_resort = fi;
336                 *last_idx = order;
337         }
338         return 1;
339 }
340
341 #ifdef CONFIG_IP_ROUTE_MULTIPATH
342
343 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
344 {
345         while (RTA_OK(attr,attrlen)) {
346                 if (attr->rta_type == type)
347                         return *(u32*)RTA_DATA(attr);
348                 attr = RTA_NEXT(attr, attrlen);
349         }
350         return 0;
351 }
352
353 static int
354 fib_count_nexthops(struct rtattr *rta)
355 {
356         int nhs = 0;
357         struct rtnexthop *nhp = RTA_DATA(rta);
358         int nhlen = RTA_PAYLOAD(rta);
359
360         while (nhlen >= (int)sizeof(struct rtnexthop)) {
361                 if ((nhlen -= nhp->rtnh_len) < 0)
362                         return 0;
363                 nhs++;
364                 nhp = RTNH_NEXT(nhp);
365         };
366         return nhs;
367 }
368
369 static int
370 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
371 {
372         struct rtnexthop *nhp = RTA_DATA(rta);
373         int nhlen = RTA_PAYLOAD(rta);
374
375         change_nexthops(fi) {
376                 int attrlen = nhlen - sizeof(struct rtnexthop);
377                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
378                         return -EINVAL;
379                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
380                 nh->nh_oif = nhp->rtnh_ifindex;
381                 nh->nh_weight = nhp->rtnh_hops + 1;
382                 if (attrlen) {
383                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
384 #ifdef CONFIG_NET_CLS_ROUTE
385                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
386 #endif
387                 }
388                 nhp = RTNH_NEXT(nhp);
389         } endfor_nexthops(fi);
390         return 0;
391 }
392
393 #endif
394
395 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
396                  struct fib_info *fi)
397 {
398 #ifdef CONFIG_IP_ROUTE_MULTIPATH
399         struct rtnexthop *nhp;
400         int nhlen;
401 #endif
402
403         if (rta->rta_priority &&
404             *rta->rta_priority != fi->fib_priority)
405                 return 1;
406
407         if (rta->rta_oif || rta->rta_gw) {
408                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
409                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
410                         return 0;
411                 return 1;
412         }
413
414 #ifdef CONFIG_IP_ROUTE_MULTIPATH
415         if (rta->rta_mp == NULL)
416                 return 0;
417         nhp = RTA_DATA(rta->rta_mp);
418         nhlen = RTA_PAYLOAD(rta->rta_mp);
419         
420         for_nexthops(fi) {
421                 int attrlen = nhlen - sizeof(struct rtnexthop);
422                 u32 gw;
423
424                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
425                         return -EINVAL;
426                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
427                         return 1;
428                 if (attrlen) {
429                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
430                         if (gw && gw != nh->nh_gw)
431                                 return 1;
432 #ifdef CONFIG_NET_CLS_ROUTE
433                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
434                         if (gw && gw != nh->nh_tclassid)
435                                 return 1;
436 #endif
437                 }
438                 nhp = RTNH_NEXT(nhp);
439         } endfor_nexthops(fi);
440 #endif
441         return 0;
442 }
443
444
445 /*
446    Picture
447    -------
448
449    Semantics of nexthop is very messy by historical reasons.
450    We have to take into account, that:
451    a) gateway can be actually local interface address,
452       so that gatewayed route is direct.
453    b) gateway must be on-link address, possibly
454       described not by an ifaddr, but also by a direct route.
455    c) If both gateway and interface are specified, they should not
456       contradict.
457    d) If we use tunnel routes, gateway could be not on-link.
458
459    Attempt to reconcile all of these (alas, self-contradictory) conditions
460    results in pretty ugly and hairy code with obscure logic.
461
462    I chose to generalized it instead, so that the size
463    of code does not increase practically, but it becomes
464    much more general.
465    Every prefix is assigned a "scope" value: "host" is local address,
466    "link" is direct route,
467    [ ... "site" ... "interior" ... ]
468    and "universe" is true gateway route with global meaning.
469
470    Every prefix refers to a set of "nexthop"s (gw, oif),
471    where gw must have narrower scope. This recursion stops
472    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
473    which means that gw is forced to be on link.
474
475    Code is still hairy, but now it is apparently logically
476    consistent and very flexible. F.e. as by-product it allows
477    to co-exists in peace independent exterior and interior
478    routing processes.
479
480    Normally it looks as following.
481
482    {universe prefix}  -> (gw, oif) [scope link]
483                           |
484                           |-> {link prefix} -> (gw, oif) [scope local]
485                                                 |
486                                                 |-> {local prefix} (terminal node)
487  */
488
489 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
490 {
491         int err;
492
493         if (nh->nh_gw) {
494                 struct fib_result res;
495
496 #ifdef CONFIG_IP_ROUTE_PERVASIVE
497                 if (nh->nh_flags&RTNH_F_PERVASIVE)
498                         return 0;
499 #endif
500                 if (nh->nh_flags&RTNH_F_ONLINK) {
501                         struct net_device *dev;
502
503                         if (r->rtm_scope >= RT_SCOPE_LINK)
504                                 return -EINVAL;
505                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
506                                 return -EINVAL;
507                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
508                                 return -ENODEV;
509                         if (!(dev->flags&IFF_UP))
510                                 return -ENETDOWN;
511                         nh->nh_dev = dev;
512                         dev_hold(dev);
513                         nh->nh_scope = RT_SCOPE_LINK;
514                         return 0;
515                 }
516                 {
517                         struct flowi fl = { .nl_u = { .ip4_u =
518                                                       { .daddr = nh->nh_gw,
519                                                         .scope = r->rtm_scope + 1 } },
520                                             .oif = nh->nh_oif };
521
522                         /* It is not necessary, but requires a bit of thinking */
523                         if (fl.fl4_scope < RT_SCOPE_LINK)
524                                 fl.fl4_scope = RT_SCOPE_LINK;
525                         if ((err = fib_lookup(&fl, &res)) != 0)
526                                 return err;
527                 }
528                 err = -EINVAL;
529                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
530                         goto out;
531                 nh->nh_scope = res.scope;
532                 nh->nh_oif = FIB_RES_OIF(res);
533                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
534                         goto out;
535                 dev_hold(nh->nh_dev);
536                 err = -ENETDOWN;
537                 if (!(nh->nh_dev->flags & IFF_UP))
538                         goto out;
539                 err = 0;
540 out:
541                 fib_res_put(&res);
542                 return err;
543         } else {
544                 struct in_device *in_dev;
545
546                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
547                         return -EINVAL;
548
549                 in_dev = inetdev_by_index(nh->nh_oif);
550                 if (in_dev == NULL)
551                         return -ENODEV;
552                 if (!(in_dev->dev->flags&IFF_UP)) {
553                         in_dev_put(in_dev);
554                         return -ENETDOWN;
555                 }
556                 nh->nh_dev = in_dev->dev;
557                 dev_hold(nh->nh_dev);
558                 nh->nh_scope = RT_SCOPE_HOST;
559                 in_dev_put(in_dev);
560         }
561         return 0;
562 }
563
564 static inline unsigned int fib_laddr_hashfn(u32 val)
565 {
566         unsigned int mask = (fib_hash_size - 1);
567
568         return (val ^ (val >> 7) ^ (val >> 14)) & mask;
569 }
570
571 static struct hlist_head *fib_hash_alloc(int bytes)
572 {
573         if (bytes <= PAGE_SIZE)
574                 return kmalloc(bytes, GFP_KERNEL);
575         else
576                 return (struct hlist_head *)
577                         __get_free_pages(GFP_KERNEL, get_order(bytes));
578 }
579
580 static void fib_hash_free(struct hlist_head *hash, int bytes)
581 {
582         if (!hash)
583                 return;
584
585         if (bytes <= PAGE_SIZE)
586                 kfree(hash);
587         else
588                 free_pages((unsigned long) hash, get_order(bytes));
589 }
590
591 static void fib_hash_move(struct hlist_head *new_info_hash,
592                           struct hlist_head *new_laddrhash,
593                           unsigned int new_size)
594 {
595         unsigned int old_size = fib_hash_size;
596         unsigned int i;
597
598         write_lock(&fib_info_lock);
599         fib_hash_size = new_size;
600
601         for (i = 0; i < old_size; i++) {
602                 struct hlist_head *head = &fib_info_hash[i];
603                 struct hlist_node *node, *n;
604                 struct fib_info *fi;
605
606                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
607                         struct hlist_head *dest;
608                         unsigned int new_hash;
609
610                         hlist_del(&fi->fib_hash);
611
612                         new_hash = fib_info_hashfn(fi);
613                         dest = &new_info_hash[new_hash];
614                         hlist_add_head(&fi->fib_hash, dest);
615                 }
616         }
617         fib_info_hash = new_info_hash;
618
619         for (i = 0; i < old_size; i++) {
620                 struct hlist_head *lhead = &fib_info_laddrhash[i];
621                 struct hlist_node *node, *n;
622                 struct fib_info *fi;
623
624                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
625                         struct hlist_head *ldest;
626                         unsigned int new_hash;
627
628                         hlist_del(&fi->fib_lhash);
629
630                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
631                         ldest = &new_laddrhash[new_hash];
632                         hlist_add_head(&fi->fib_lhash, ldest);
633                 }
634         }
635         fib_info_laddrhash = new_laddrhash;
636
637         write_unlock(&fib_info_lock);
638 }
639
640 struct fib_info *
641 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
642                 const struct nlmsghdr *nlh, int *errp)
643 {
644         int err;
645         struct fib_info *fi = NULL;
646         struct fib_info *ofi;
647 #ifdef CONFIG_IP_ROUTE_MULTIPATH
648         int nhs = 1;
649 #else
650         const int nhs = 1;
651 #endif
652
653         /* Fast check to catch the most weird cases */
654         if (fib_props[r->rtm_type].scope > r->rtm_scope)
655                 goto err_inval;
656
657 #ifdef CONFIG_IP_ROUTE_MULTIPATH
658         if (rta->rta_mp) {
659                 nhs = fib_count_nexthops(rta->rta_mp);
660                 if (nhs == 0)
661                         goto err_inval;
662         }
663 #endif
664
665         err = -ENOBUFS;
666         if (fib_info_cnt >= fib_hash_size) {
667                 unsigned int new_size = fib_hash_size << 1;
668                 struct hlist_head *new_info_hash;
669                 struct hlist_head *new_laddrhash;
670                 unsigned int bytes;
671
672                 if (!new_size)
673                         new_size = 1;
674                 bytes = new_size * sizeof(struct hlist_head *);
675                 new_info_hash = fib_hash_alloc(bytes);
676                 new_laddrhash = fib_hash_alloc(bytes);
677                 if (!new_info_hash || !new_laddrhash) {
678                         fib_hash_free(new_info_hash, bytes);
679                         fib_hash_free(new_laddrhash, bytes);
680                 } else {
681                         memset(new_info_hash, 0, bytes);
682                         memset(new_laddrhash, 0, bytes);
683
684                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
685                 }
686
687                 if (!fib_hash_size)
688                         goto failure;
689         }
690
691         fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
692         if (fi == NULL)
693                 goto failure;
694         fib_info_cnt++;
695         memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
696
697         fi->fib_protocol = r->rtm_protocol;
698
699         fi->fib_nhs = nhs;
700         change_nexthops(fi) {
701                 nh->nh_parent = fi;
702         } endfor_nexthops(fi)
703
704         fi->fib_flags = r->rtm_flags;
705         if (rta->rta_priority)
706                 fi->fib_priority = *rta->rta_priority;
707         if (rta->rta_mx) {
708                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
709                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
710
711                 while (RTA_OK(attr, attrlen)) {
712                         unsigned flavor = attr->rta_type;
713                         if (flavor) {
714                                 if (flavor > RTAX_MAX)
715                                         goto err_inval;
716                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
717                         }
718                         attr = RTA_NEXT(attr, attrlen);
719                 }
720         }
721         if (rta->rta_prefsrc)
722                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
723
724         if (rta->rta_mp) {
725 #ifdef CONFIG_IP_ROUTE_MULTIPATH
726                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
727                         goto failure;
728                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
729                         goto err_inval;
730                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
731                         goto err_inval;
732 #ifdef CONFIG_NET_CLS_ROUTE
733                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
734                         goto err_inval;
735 #endif
736 #else
737                 goto err_inval;
738 #endif
739         } else {
740                 struct fib_nh *nh = fi->fib_nh;
741                 if (rta->rta_oif)
742                         nh->nh_oif = *rta->rta_oif;
743                 if (rta->rta_gw)
744                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
745 #ifdef CONFIG_NET_CLS_ROUTE
746                 if (rta->rta_flow)
747                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
748 #endif
749                 nh->nh_flags = r->rtm_flags;
750 #ifdef CONFIG_IP_ROUTE_MULTIPATH
751                 nh->nh_weight = 1;
752 #endif
753         }
754
755         if (fib_props[r->rtm_type].error) {
756                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
757                         goto err_inval;
758                 goto link_it;
759         }
760
761         if (r->rtm_scope > RT_SCOPE_HOST)
762                 goto err_inval;
763
764         if (r->rtm_scope == RT_SCOPE_HOST) {
765                 struct fib_nh *nh = fi->fib_nh;
766
767                 /* Local address is added. */
768                 if (nhs != 1 || nh->nh_gw)
769                         goto err_inval;
770                 nh->nh_scope = RT_SCOPE_NOWHERE;
771                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
772                 err = -ENODEV;
773                 if (nh->nh_dev == NULL)
774                         goto failure;
775         } else {
776                 change_nexthops(fi) {
777                         if ((err = fib_check_nh(r, fi, nh)) != 0)
778                                 goto failure;
779                 } endfor_nexthops(fi)
780         }
781
782         if (fi->fib_prefsrc) {
783                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
784                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
785                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
786                                 goto err_inval;
787         }
788
789 link_it:
790         if ((ofi = fib_find_info(fi)) != NULL) {
791                 fi->fib_dead = 1;
792                 free_fib_info(fi);
793                 ofi->fib_treeref++;
794                 return ofi;
795         }
796
797         fi->fib_treeref++;
798         atomic_inc(&fi->fib_clntref);
799         write_lock(&fib_info_lock);
800         hlist_add_head(&fi->fib_hash,
801                        &fib_info_hash[fib_info_hashfn(fi)]);
802         if (fi->fib_prefsrc) {
803                 struct hlist_head *head;
804
805                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
806                 hlist_add_head(&fi->fib_lhash, head);
807         }
808         change_nexthops(fi) {
809                 struct hlist_head *head;
810                 unsigned int hash;
811
812                 if (!nh->nh_dev)
813                         continue;
814                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
815                 head = &fib_info_devhash[hash];
816                 hlist_add_head(&nh->nh_hash, head);
817         } endfor_nexthops(fi)
818         write_unlock(&fib_info_lock);
819         return fi;
820
821 err_inval:
822         err = -EINVAL;
823
824 failure:
825         *errp = err;
826         if (fi) {
827                 fi->fib_dead = 1;
828                 free_fib_info(fi);
829         }
830         return NULL;
831 }
832
833 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
834                        struct fib_result *res, int prefixlen)
835 {
836         struct fib_alias *fa;
837         int nh_sel = 0;
838
839         list_for_each_entry(fa, head, fa_list) {
840                 int err;
841
842                 if (fa->fa_tos &&
843                     fa->fa_tos != flp->fl4_tos)
844                         continue;
845
846                 if (fa->fa_scope < flp->fl4_scope)
847                         continue;
848
849                 fa->fa_state |= FA_S_ACCESSED;
850
851                 err = fib_props[fa->fa_type].error;
852                 if (err == 0) {
853                         struct fib_info *fi = fa->fa_info;
854
855                         if (fi->fib_flags & RTNH_F_DEAD)
856                                 continue;
857
858                         switch (fa->fa_type) {
859                         case RTN_UNICAST:
860                         case RTN_LOCAL:
861                         case RTN_BROADCAST:
862                         case RTN_ANYCAST:
863                         case RTN_MULTICAST:
864                                 for_nexthops(fi) {
865                                         if (nh->nh_flags&RTNH_F_DEAD)
866                                                 continue;
867                                         if (!flp->oif || flp->oif == nh->nh_oif)
868                                                 break;
869                                 }
870 #ifdef CONFIG_IP_ROUTE_MULTIPATH
871                                 if (nhsel < fi->fib_nhs) {
872                                         nh_sel = nhsel;
873                                         goto out_fill_res;
874                                 }
875 #else
876                                 if (nhsel < 1) {
877                                         goto out_fill_res;
878                                 }
879 #endif
880                                 endfor_nexthops(fi);
881                                 continue;
882
883                         default:
884                                 printk(KERN_DEBUG "impossible 102\n");
885                                 return -EINVAL;
886                         };
887                 }
888                 return err;
889         }
890         return 1;
891
892 out_fill_res:
893         res->prefixlen = prefixlen;
894         res->nh_sel = nh_sel;
895         res->type = fa->fa_type;
896         res->scope = fa->fa_scope;
897         res->fi = fa->fa_info;
898         atomic_inc(&res->fi->fib_clntref);
899         return 0;
900 }
901
902 /* Find appropriate source address to this destination */
903
904 u32 __fib_res_prefsrc(struct fib_result *res)
905 {
906         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
907 }
908
909 int
910 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
911               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
912               struct fib_info *fi)
913 {
914         struct rtmsg *rtm;
915         struct nlmsghdr  *nlh;
916         unsigned char    *b = skb->tail;
917
918         nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
919         rtm = NLMSG_DATA(nlh);
920         rtm->rtm_family = AF_INET;
921         rtm->rtm_dst_len = dst_len;
922         rtm->rtm_src_len = 0;
923         rtm->rtm_tos = tos;
924         rtm->rtm_table = tb_id;
925         rtm->rtm_type = type;
926         rtm->rtm_flags = fi->fib_flags;
927         rtm->rtm_scope = scope;
928         if (rtm->rtm_dst_len)
929                 RTA_PUT(skb, RTA_DST, 4, dst);
930         rtm->rtm_protocol = fi->fib_protocol;
931         if (fi->fib_priority)
932                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
933 #ifdef CONFIG_NET_CLS_ROUTE
934         if (fi->fib_nh[0].nh_tclassid)
935                 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
936 #endif
937         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
938                 goto rtattr_failure;
939         if (fi->fib_prefsrc)
940                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
941         if (fi->fib_nhs == 1) {
942                 if (fi->fib_nh->nh_gw)
943                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
944                 if (fi->fib_nh->nh_oif)
945                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
946         }
947 #ifdef CONFIG_IP_ROUTE_MULTIPATH
948         if (fi->fib_nhs > 1) {
949                 struct rtnexthop *nhp;
950                 struct rtattr *mp_head;
951                 if (skb_tailroom(skb) <= RTA_SPACE(0))
952                         goto rtattr_failure;
953                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
954
955                 for_nexthops(fi) {
956                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
957                                 goto rtattr_failure;
958                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
959                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
960                         nhp->rtnh_hops = nh->nh_weight-1;
961                         nhp->rtnh_ifindex = nh->nh_oif;
962                         if (nh->nh_gw)
963                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
964                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
965                 } endfor_nexthops(fi);
966                 mp_head->rta_type = RTA_MULTIPATH;
967                 mp_head->rta_len = skb->tail - (u8*)mp_head;
968         }
969 #endif
970         nlh->nlmsg_len = skb->tail - b;
971         return skb->len;
972
973 nlmsg_failure:
974 rtattr_failure:
975         skb_trim(skb, b - skb->data);
976         return -1;
977 }
978
979 #ifndef CONFIG_IP_NOSIOCRT
980
981 int
982 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
983                     struct kern_rta *rta, struct rtentry *r)
984 {
985         int    plen;
986         u32    *ptr;
987
988         memset(rtm, 0, sizeof(*rtm));
989         memset(rta, 0, sizeof(*rta));
990
991         if (r->rt_dst.sa_family != AF_INET)
992                 return -EAFNOSUPPORT;
993
994         /* Check mask for validity:
995            a) it must be contiguous.
996            b) destination must have all host bits clear.
997            c) if application forgot to set correct family (AF_INET),
998               reject request unless it is absolutely clear i.e.
999               both family and mask are zero.
1000          */
1001         plen = 32;
1002         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1003         if (!(r->rt_flags&RTF_HOST)) {
1004                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1005                 if (r->rt_genmask.sa_family != AF_INET) {
1006                         if (mask || r->rt_genmask.sa_family)
1007                                 return -EAFNOSUPPORT;
1008                 }
1009                 if (bad_mask(mask, *ptr))
1010                         return -EINVAL;
1011                 plen = inet_mask_len(mask);
1012         }
1013
1014         nl->nlmsg_flags = NLM_F_REQUEST;
1015         nl->nlmsg_pid = 0;
1016         nl->nlmsg_seq = 0;
1017         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1018         if (cmd == SIOCDELRT) {
1019                 nl->nlmsg_type = RTM_DELROUTE;
1020                 nl->nlmsg_flags = 0;
1021         } else {
1022                 nl->nlmsg_type = RTM_NEWROUTE;
1023                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1024                 rtm->rtm_protocol = RTPROT_BOOT;
1025         }
1026
1027         rtm->rtm_dst_len = plen;
1028         rta->rta_dst = ptr;
1029
1030         if (r->rt_metric) {
1031                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1032                 rta->rta_priority = (u32*)&r->rt_pad3;
1033         }
1034         if (r->rt_flags&RTF_REJECT) {
1035                 rtm->rtm_scope = RT_SCOPE_HOST;
1036                 rtm->rtm_type = RTN_UNREACHABLE;
1037                 return 0;
1038         }
1039         rtm->rtm_scope = RT_SCOPE_NOWHERE;
1040         rtm->rtm_type = RTN_UNICAST;
1041
1042         if (r->rt_dev) {
1043                 char *colon;
1044                 struct net_device *dev;
1045                 char   devname[IFNAMSIZ];
1046
1047                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1048                         return -EFAULT;
1049                 devname[IFNAMSIZ-1] = 0;
1050                 colon = strchr(devname, ':');
1051                 if (colon)
1052                         *colon = 0;
1053                 dev = __dev_get_by_name(devname);
1054                 if (!dev)
1055                         return -ENODEV;
1056                 rta->rta_oif = &dev->ifindex;
1057                 if (colon) {
1058                         struct in_ifaddr *ifa;
1059                         struct in_device *in_dev = __in_dev_get(dev);
1060                         if (!in_dev)
1061                                 return -ENODEV;
1062                         *colon = ':';
1063                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1064                                 if (strcmp(ifa->ifa_label, devname) == 0)
1065                                         break;
1066                         if (ifa == NULL)
1067                                 return -ENODEV;
1068                         rta->rta_prefsrc = &ifa->ifa_local;
1069                 }
1070         }
1071
1072         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1073         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1074                 rta->rta_gw = ptr;
1075                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1076                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1077         }
1078
1079         if (cmd == SIOCDELRT)
1080                 return 0;
1081
1082         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1083                 return -EINVAL;
1084
1085         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1086                 rtm->rtm_scope = RT_SCOPE_LINK;
1087
1088         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1089                 struct rtattr *rec;
1090                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1091                 if (mx == NULL)
1092                         return -ENOMEM;
1093                 rta->rta_mx = mx;
1094                 mx->rta_type = RTA_METRICS;
1095                 mx->rta_len  = RTA_LENGTH(0);
1096                 if (r->rt_flags&RTF_MTU) {
1097                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1098                         rec->rta_type = RTAX_ADVMSS;
1099                         rec->rta_len = RTA_LENGTH(4);
1100                         mx->rta_len += RTA_LENGTH(4);
1101                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1102                 }
1103                 if (r->rt_flags&RTF_WINDOW) {
1104                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1105                         rec->rta_type = RTAX_WINDOW;
1106                         rec->rta_len = RTA_LENGTH(4);
1107                         mx->rta_len += RTA_LENGTH(4);
1108                         *(u32*)RTA_DATA(rec) = r->rt_window;
1109                 }
1110                 if (r->rt_flags&RTF_IRTT) {
1111                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1112                         rec->rta_type = RTAX_RTT;
1113                         rec->rta_len = RTA_LENGTH(4);
1114                         mx->rta_len += RTA_LENGTH(4);
1115                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1116                 }
1117         }
1118         return 0;
1119 }
1120
1121 #endif
1122
1123 /*
1124    Update FIB if:
1125    - local address disappeared -> we must delete all the entries
1126      referring to it.
1127    - device went down -> we must shutdown all nexthops going via it.
1128  */
1129
1130 int fib_sync_down(u32 local, struct net_device *dev, int force)
1131 {
1132         int ret = 0;
1133         int scope = RT_SCOPE_NOWHERE;
1134         
1135         if (force)
1136                 scope = -1;
1137
1138         if (local && fib_info_laddrhash) {
1139                 unsigned int hash = fib_laddr_hashfn(local);
1140                 struct hlist_head *head = &fib_info_laddrhash[hash];
1141                 struct hlist_node *node;
1142                 struct fib_info *fi;
1143
1144                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1145                         if (fi->fib_prefsrc == local) {
1146                                 fi->fib_flags |= RTNH_F_DEAD;
1147                                 ret++;
1148                         }
1149                 }
1150         }
1151
1152         if (dev) {
1153                 struct fib_info *prev_fi = NULL;
1154                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1155                 struct hlist_head *head = &fib_info_devhash[hash];
1156                 struct hlist_node *node;
1157                 struct fib_nh *nh;
1158
1159                 hlist_for_each_entry(nh, node, head, nh_hash) {
1160                         struct fib_info *fi = nh->nh_parent;
1161                         int dead;
1162
1163                         BUG_ON(!fi->fib_nhs);
1164                         if (nh->nh_dev != dev || fi == prev_fi)
1165                                 continue;
1166                         prev_fi = fi;
1167                         dead = 0;
1168                         change_nexthops(fi) {
1169                                 if (nh->nh_flags&RTNH_F_DEAD)
1170                                         dead++;
1171                                 else if (nh->nh_dev == dev &&
1172                                          nh->nh_scope != scope) {
1173                                         nh->nh_flags |= RTNH_F_DEAD;
1174 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1175                                         spin_lock_bh(&fib_multipath_lock);
1176                                         fi->fib_power -= nh->nh_power;
1177                                         nh->nh_power = 0;
1178                                         spin_unlock_bh(&fib_multipath_lock);
1179 #endif
1180                                         dead++;
1181                                 }
1182 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1183                                 if (force > 1 && nh->nh_dev == dev) {
1184                                         dead = fi->fib_nhs;
1185                                         break;
1186                                 }
1187 #endif
1188                         } endfor_nexthops(fi)
1189                         if (dead == fi->fib_nhs) {
1190                                 fi->fib_flags |= RTNH_F_DEAD;
1191                                 ret++;
1192                         }
1193                 }
1194         }
1195
1196         return ret;
1197 }
1198
1199 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1200
1201 /*
1202    Dead device goes up. We wake up dead nexthops.
1203    It takes sense only on multipath routes.
1204  */
1205
1206 int fib_sync_up(struct net_device *dev)
1207 {
1208         struct fib_info *prev_fi;
1209         unsigned int hash;
1210         struct hlist_head *head;
1211         struct hlist_node *node;
1212         struct fib_nh *nh;
1213         int ret;
1214
1215         if (!(dev->flags&IFF_UP))
1216                 return 0;
1217
1218         prev_fi = NULL;
1219         hash = fib_devindex_hashfn(dev->ifindex);
1220         head = &fib_info_devhash[hash];
1221         ret = 0;
1222
1223         hlist_for_each_entry(nh, node, head, nh_hash) {
1224                 struct fib_info *fi = nh->nh_parent;
1225                 int alive;
1226
1227                 BUG_ON(!fi->fib_nhs);
1228                 if (nh->nh_dev != dev || fi == prev_fi)
1229                         continue;
1230
1231                 prev_fi = fi;
1232                 alive = 0;
1233                 change_nexthops(fi) {
1234                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1235                                 alive++;
1236                                 continue;
1237                         }
1238                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1239                                 continue;
1240                         if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
1241                                 continue;
1242                         alive++;
1243                         spin_lock_bh(&fib_multipath_lock);
1244                         nh->nh_power = 0;
1245                         nh->nh_flags &= ~RTNH_F_DEAD;
1246                         spin_unlock_bh(&fib_multipath_lock);
1247                 } endfor_nexthops(fi)
1248
1249                 if (alive > 0) {
1250                         fi->fib_flags &= ~RTNH_F_DEAD;
1251                         ret++;
1252                 }
1253         }
1254
1255         return ret;
1256 }
1257
1258 /*
1259    The algorithm is suboptimal, but it provides really
1260    fair weighted route distribution.
1261  */
1262
1263 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1264 {
1265         struct fib_info *fi = res->fi;
1266         int w;
1267
1268         spin_lock_bh(&fib_multipath_lock);
1269         if (fi->fib_power <= 0) {
1270                 int power = 0;
1271                 change_nexthops(fi) {
1272                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1273                                 power += nh->nh_weight;
1274                                 nh->nh_power = nh->nh_weight;
1275                         }
1276                 } endfor_nexthops(fi);
1277                 fi->fib_power = power;
1278                 if (power <= 0) {
1279                         spin_unlock_bh(&fib_multipath_lock);
1280                         /* Race condition: route has just become dead. */
1281                         res->nh_sel = 0;
1282                         return;
1283                 }
1284         }
1285
1286
1287         /* w should be random number [0..fi->fib_power-1],
1288            it is pretty bad approximation.
1289          */
1290
1291         w = jiffies % fi->fib_power;
1292
1293         change_nexthops(fi) {
1294                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1295                         if ((w -= nh->nh_power) <= 0) {
1296                                 nh->nh_power--;
1297                                 fi->fib_power--;
1298                                 res->nh_sel = nhsel;
1299                                 spin_unlock_bh(&fib_multipath_lock);
1300                                 return;
1301                         }
1302                 }
1303         } endfor_nexthops(fi);
1304
1305         /* Race condition: route has just become dead. */
1306         res->nh_sel = 0;
1307         spin_unlock_bh(&fib_multipath_lock);
1308 }
1309 #endif