vserver 2.0 rc7
[linux-2.6.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46
47 #include "fib_lookup.h"
48
49 #define FSprintk(a...)
50
51 static DEFINE_RWLOCK(fib_info_lock);
52 static struct hlist_head *fib_info_hash;
53 static struct hlist_head *fib_info_laddrhash;
54 static unsigned int fib_hash_size;
55 static unsigned int fib_info_cnt;
56
57 #define DEVINDEX_HASHBITS 8
58 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60
61 #ifdef CONFIG_IP_ROUTE_MULTIPATH
62
63 static DEFINE_SPINLOCK(fib_multipath_lock);
64
65 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
66 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67
68 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
69 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70
71 #else /* CONFIG_IP_ROUTE_MULTIPATH */
72
73 /* Hope, that gcc will optimize it to get rid of dummy loop */
74
75 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
76 for (nhsel=0; nhsel < 1; nhsel++)
77
78 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
79 for (nhsel=0; nhsel < 1; nhsel++)
80
81 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
82
83 #define endfor_nexthops(fi) }
84
85
86 static struct 
87 {
88         int     error;
89         u8      scope;
90 } fib_props[RTA_MAX + 1] = {
91         {
92                 .error  = 0,
93                 .scope  = RT_SCOPE_NOWHERE,
94         },      /* RTN_UNSPEC */
95         {
96                 .error  = 0,
97                 .scope  = RT_SCOPE_UNIVERSE,
98         },      /* RTN_UNICAST */
99         {
100                 .error  = 0,
101                 .scope  = RT_SCOPE_HOST,
102         },      /* RTN_LOCAL */
103         {
104                 .error  = 0,
105                 .scope  = RT_SCOPE_LINK,
106         },      /* RTN_BROADCAST */
107         {
108                 .error  = 0,
109                 .scope  = RT_SCOPE_LINK,
110         },      /* RTN_ANYCAST */
111         {
112                 .error  = 0,
113                 .scope  = RT_SCOPE_UNIVERSE,
114         },      /* RTN_MULTICAST */
115         {
116                 .error  = -EINVAL,
117                 .scope  = RT_SCOPE_UNIVERSE,
118         },      /* RTN_BLACKHOLE */
119         {
120                 .error  = -EHOSTUNREACH,
121                 .scope  = RT_SCOPE_UNIVERSE,
122         },      /* RTN_UNREACHABLE */
123         {
124                 .error  = -EACCES,
125                 .scope  = RT_SCOPE_UNIVERSE,
126         },      /* RTN_PROHIBIT */
127         {
128                 .error  = -EAGAIN,
129                 .scope  = RT_SCOPE_UNIVERSE,
130         },      /* RTN_THROW */
131         {
132                 .error  = -EINVAL,
133                 .scope  = RT_SCOPE_NOWHERE,
134         },      /* RTN_NAT */
135         {
136                 .error  = -EINVAL,
137                 .scope  = RT_SCOPE_NOWHERE,
138         },      /* RTN_XRESOLVE */
139 };
140
141
142 /* Release a nexthop info record */
143
144 void free_fib_info(struct fib_info *fi)
145 {
146         if (fi->fib_dead == 0) {
147                 printk("Freeing alive fib_info %p\n", fi);
148                 return;
149         }
150         change_nexthops(fi) {
151                 if (nh->nh_dev)
152                         dev_put(nh->nh_dev);
153                 nh->nh_dev = NULL;
154         } endfor_nexthops(fi);
155         fib_info_cnt--;
156         kfree(fi);
157 }
158
159 void fib_release_info(struct fib_info *fi)
160 {
161         write_lock(&fib_info_lock);
162         if (fi && --fi->fib_treeref == 0) {
163                 hlist_del(&fi->fib_hash);
164                 if (fi->fib_prefsrc)
165                         hlist_del(&fi->fib_lhash);
166                 change_nexthops(fi) {
167                         if (!nh->nh_dev)
168                                 continue;
169                         hlist_del(&nh->nh_hash);
170                 } endfor_nexthops(fi)
171                 fi->fib_dead = 1;
172                 fib_info_put(fi);
173         }
174         write_unlock(&fib_info_lock);
175 }
176
177 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
178 {
179         const struct fib_nh *onh = ofi->fib_nh;
180
181         for_nexthops(fi) {
182                 if (nh->nh_oif != onh->nh_oif ||
183                     nh->nh_gw  != onh->nh_gw ||
184                     nh->nh_scope != onh->nh_scope ||
185 #ifdef CONFIG_IP_ROUTE_MULTIPATH
186                     nh->nh_weight != onh->nh_weight ||
187 #endif
188 #ifdef CONFIG_NET_CLS_ROUTE
189                     nh->nh_tclassid != onh->nh_tclassid ||
190 #endif
191                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
192                         return -1;
193                 onh++;
194         } endfor_nexthops(fi);
195         return 0;
196 }
197
198 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
199 {
200         unsigned int mask = (fib_hash_size - 1);
201         unsigned int val = fi->fib_nhs;
202
203         val ^= fi->fib_protocol;
204         val ^= fi->fib_prefsrc;
205         val ^= fi->fib_priority;
206
207         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
208 }
209
210 static struct fib_info *fib_find_info(const struct fib_info *nfi)
211 {
212         struct hlist_head *head;
213         struct hlist_node *node;
214         struct fib_info *fi;
215         unsigned int hash;
216
217         hash = fib_info_hashfn(nfi);
218         head = &fib_info_hash[hash];
219
220         hlist_for_each_entry(fi, node, head, fib_hash) {
221                 if (fi->fib_nhs != nfi->fib_nhs)
222                         continue;
223                 if (nfi->fib_protocol == fi->fib_protocol &&
224                     nfi->fib_prefsrc == fi->fib_prefsrc &&
225                     nfi->fib_priority == fi->fib_priority &&
226                     memcmp(nfi->fib_metrics, fi->fib_metrics,
227                            sizeof(fi->fib_metrics)) == 0 &&
228                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
229                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
230                         return fi;
231         }
232
233         return NULL;
234 }
235
236 static inline unsigned int fib_devindex_hashfn(unsigned int val)
237 {
238         unsigned int mask = DEVINDEX_HASHSIZE - 1;
239
240         return (val ^
241                 (val >> DEVINDEX_HASHBITS) ^
242                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
243 }
244
245 /* Check, that the gateway is already configured.
246    Used only by redirect accept routine.
247  */
248
249 int ip_fib_check_default(u32 gw, struct net_device *dev)
250 {
251         struct hlist_head *head;
252         struct hlist_node *node;
253         struct fib_nh *nh;
254         unsigned int hash;
255
256         read_lock(&fib_info_lock);
257
258         hash = fib_devindex_hashfn(dev->ifindex);
259         head = &fib_info_devhash[hash];
260         hlist_for_each_entry(nh, node, head, nh_hash) {
261                 if (nh->nh_dev == dev &&
262                     nh->nh_gw == gw &&
263                     !(nh->nh_flags&RTNH_F_DEAD)) {
264                         read_unlock(&fib_info_lock);
265                         return 0;
266                 }
267         }
268
269         read_unlock(&fib_info_lock);
270
271         return -1;
272 }
273
274 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
275                int z, int tb_id,
276                struct nlmsghdr *n, struct netlink_skb_parms *req)
277 {
278         struct sk_buff *skb;
279         u32 pid = req ? req->pid : 0;
280         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281
282         skb = alloc_skb(size, GFP_KERNEL);
283         if (!skb)
284                 return;
285
286         if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287                           fa->fa_type, fa->fa_scope, &key, z,
288                           fa->fa_tos,
289                           fa->fa_info) < 0) {
290                 kfree_skb(skb);
291                 return;
292         }
293         NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
294         if (n->nlmsg_flags&NLM_F_ECHO)
295                 atomic_inc(&skb->users);
296         netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
297         if (n->nlmsg_flags&NLM_F_ECHO)
298                 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
299 }
300
301 /* Return the first fib alias matching TOS with
302  * priority less than or equal to PRIO.
303  */
304 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
305 {
306         if (fah) {
307                 struct fib_alias *fa;
308                 list_for_each_entry(fa, fah, fa_list) {
309                         if (fa->fa_tos > tos)
310                                 continue;
311                         if (fa->fa_info->fib_priority >= prio ||
312                             fa->fa_tos < tos)
313                                 return fa;
314                 }
315         }
316         return NULL;
317 }
318
319 int fib_detect_death(struct fib_info *fi, int order,
320                      struct fib_info **last_resort, int *last_idx, int *dflt)
321 {
322         struct neighbour *n;
323         int state = NUD_NONE;
324
325         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
326         if (n) {
327                 state = n->nud_state;
328                 neigh_release(n);
329         }
330         if (state==NUD_REACHABLE)
331                 return 0;
332         if ((state&NUD_VALID) && order != *dflt)
333                 return 0;
334         if ((state&NUD_VALID) ||
335             (*last_idx<0 && order > *dflt)) {
336                 *last_resort = fi;
337                 *last_idx = order;
338         }
339         return 1;
340 }
341
342 #ifdef CONFIG_IP_ROUTE_MULTIPATH
343
344 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
345 {
346         while (RTA_OK(attr,attrlen)) {
347                 if (attr->rta_type == type)
348                         return *(u32*)RTA_DATA(attr);
349                 attr = RTA_NEXT(attr, attrlen);
350         }
351         return 0;
352 }
353
354 static int
355 fib_count_nexthops(struct rtattr *rta)
356 {
357         int nhs = 0;
358         struct rtnexthop *nhp = RTA_DATA(rta);
359         int nhlen = RTA_PAYLOAD(rta);
360
361         while (nhlen >= (int)sizeof(struct rtnexthop)) {
362                 if ((nhlen -= nhp->rtnh_len) < 0)
363                         return 0;
364                 nhs++;
365                 nhp = RTNH_NEXT(nhp);
366         };
367         return nhs;
368 }
369
370 static int
371 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
372 {
373         struct rtnexthop *nhp = RTA_DATA(rta);
374         int nhlen = RTA_PAYLOAD(rta);
375
376         change_nexthops(fi) {
377                 int attrlen = nhlen - sizeof(struct rtnexthop);
378                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
379                         return -EINVAL;
380                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
381                 nh->nh_oif = nhp->rtnh_ifindex;
382                 nh->nh_weight = nhp->rtnh_hops + 1;
383                 if (attrlen) {
384                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
385 #ifdef CONFIG_NET_CLS_ROUTE
386                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
387 #endif
388                 }
389                 nhp = RTNH_NEXT(nhp);
390         } endfor_nexthops(fi);
391         return 0;
392 }
393
394 #endif
395
396 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
397                  struct fib_info *fi)
398 {
399 #ifdef CONFIG_IP_ROUTE_MULTIPATH
400         struct rtnexthop *nhp;
401         int nhlen;
402 #endif
403
404         if (rta->rta_priority &&
405             *rta->rta_priority != fi->fib_priority)
406                 return 1;
407
408         if (rta->rta_oif || rta->rta_gw) {
409                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
410                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
411                         return 0;
412                 return 1;
413         }
414
415 #ifdef CONFIG_IP_ROUTE_MULTIPATH
416         if (rta->rta_mp == NULL)
417                 return 0;
418         nhp = RTA_DATA(rta->rta_mp);
419         nhlen = RTA_PAYLOAD(rta->rta_mp);
420         
421         for_nexthops(fi) {
422                 int attrlen = nhlen - sizeof(struct rtnexthop);
423                 u32 gw;
424
425                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
426                         return -EINVAL;
427                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
428                         return 1;
429                 if (attrlen) {
430                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
431                         if (gw && gw != nh->nh_gw)
432                                 return 1;
433 #ifdef CONFIG_NET_CLS_ROUTE
434                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
435                         if (gw && gw != nh->nh_tclassid)
436                                 return 1;
437 #endif
438                 }
439                 nhp = RTNH_NEXT(nhp);
440         } endfor_nexthops(fi);
441 #endif
442         return 0;
443 }
444
445
446 /*
447    Picture
448    -------
449
450    Semantics of nexthop is very messy by historical reasons.
451    We have to take into account, that:
452    a) gateway can be actually local interface address,
453       so that gatewayed route is direct.
454    b) gateway must be on-link address, possibly
455       described not by an ifaddr, but also by a direct route.
456    c) If both gateway and interface are specified, they should not
457       contradict.
458    d) If we use tunnel routes, gateway could be not on-link.
459
460    Attempt to reconcile all of these (alas, self-contradictory) conditions
461    results in pretty ugly and hairy code with obscure logic.
462
463    I chose to generalized it instead, so that the size
464    of code does not increase practically, but it becomes
465    much more general.
466    Every prefix is assigned a "scope" value: "host" is local address,
467    "link" is direct route,
468    [ ... "site" ... "interior" ... ]
469    and "universe" is true gateway route with global meaning.
470
471    Every prefix refers to a set of "nexthop"s (gw, oif),
472    where gw must have narrower scope. This recursion stops
473    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474    which means that gw is forced to be on link.
475
476    Code is still hairy, but now it is apparently logically
477    consistent and very flexible. F.e. as by-product it allows
478    to co-exists in peace independent exterior and interior
479    routing processes.
480
481    Normally it looks as following.
482
483    {universe prefix}  -> (gw, oif) [scope link]
484                           |
485                           |-> {link prefix} -> (gw, oif) [scope local]
486                                                 |
487                                                 |-> {local prefix} (terminal node)
488  */
489
490 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
491 {
492         int err;
493
494         if (nh->nh_gw) {
495                 struct fib_result res;
496
497 #ifdef CONFIG_IP_ROUTE_PERVASIVE
498                 if (nh->nh_flags&RTNH_F_PERVASIVE)
499                         return 0;
500 #endif
501                 if (nh->nh_flags&RTNH_F_ONLINK) {
502                         struct net_device *dev;
503
504                         if (r->rtm_scope >= RT_SCOPE_LINK)
505                                 return -EINVAL;
506                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
507                                 return -EINVAL;
508                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
509                                 return -ENODEV;
510                         if (!(dev->flags&IFF_UP))
511                                 return -ENETDOWN;
512                         nh->nh_dev = dev;
513                         dev_hold(dev);
514                         nh->nh_scope = RT_SCOPE_LINK;
515                         return 0;
516                 }
517                 {
518                         struct flowi fl = { .nl_u = { .ip4_u =
519                                                       { .daddr = nh->nh_gw,
520                                                         .scope = r->rtm_scope + 1 } },
521                                             .oif = nh->nh_oif };
522
523                         /* It is not necessary, but requires a bit of thinking */
524                         if (fl.fl4_scope < RT_SCOPE_LINK)
525                                 fl.fl4_scope = RT_SCOPE_LINK;
526                         if ((err = fib_lookup(&fl, &res)) != 0)
527                                 return err;
528                 }
529                 err = -EINVAL;
530                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
531                         goto out;
532                 nh->nh_scope = res.scope;
533                 nh->nh_oif = FIB_RES_OIF(res);
534                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
535                         goto out;
536                 dev_hold(nh->nh_dev);
537                 err = -ENETDOWN;
538                 if (!(nh->nh_dev->flags & IFF_UP))
539                         goto out;
540                 err = 0;
541 out:
542                 fib_res_put(&res);
543                 return err;
544         } else {
545                 struct in_device *in_dev;
546
547                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
548                         return -EINVAL;
549
550                 in_dev = inetdev_by_index(nh->nh_oif);
551                 if (in_dev == NULL)
552                         return -ENODEV;
553                 if (!(in_dev->dev->flags&IFF_UP)) {
554                         in_dev_put(in_dev);
555                         return -ENETDOWN;
556                 }
557                 nh->nh_dev = in_dev->dev;
558                 dev_hold(nh->nh_dev);
559                 nh->nh_scope = RT_SCOPE_HOST;
560                 in_dev_put(in_dev);
561         }
562         return 0;
563 }
564
565 static inline unsigned int fib_laddr_hashfn(u32 val)
566 {
567         unsigned int mask = (fib_hash_size - 1);
568
569         return (val ^ (val >> 7) ^ (val >> 14)) & mask;
570 }
571
572 static struct hlist_head *fib_hash_alloc(int bytes)
573 {
574         if (bytes <= PAGE_SIZE)
575                 return kmalloc(bytes, GFP_KERNEL);
576         else
577                 return (struct hlist_head *)
578                         __get_free_pages(GFP_KERNEL, get_order(bytes));
579 }
580
581 static void fib_hash_free(struct hlist_head *hash, int bytes)
582 {
583         if (!hash)
584                 return;
585
586         if (bytes <= PAGE_SIZE)
587                 kfree(hash);
588         else
589                 free_pages((unsigned long) hash, get_order(bytes));
590 }
591
592 static void fib_hash_move(struct hlist_head *new_info_hash,
593                           struct hlist_head *new_laddrhash,
594                           unsigned int new_size)
595 {
596         unsigned int old_size = fib_hash_size;
597         unsigned int i;
598
599         write_lock(&fib_info_lock);
600         fib_hash_size = new_size;
601
602         for (i = 0; i < old_size; i++) {
603                 struct hlist_head *head = &fib_info_hash[i];
604                 struct hlist_node *node, *n;
605                 struct fib_info *fi;
606
607                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
608                         struct hlist_head *dest;
609                         unsigned int new_hash;
610
611                         hlist_del(&fi->fib_hash);
612
613                         new_hash = fib_info_hashfn(fi);
614                         dest = &new_info_hash[new_hash];
615                         hlist_add_head(&fi->fib_hash, dest);
616                 }
617         }
618         fib_info_hash = new_info_hash;
619
620         for (i = 0; i < old_size; i++) {
621                 struct hlist_head *lhead = &fib_info_laddrhash[i];
622                 struct hlist_node *node, *n;
623                 struct fib_info *fi;
624
625                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
626                         struct hlist_head *ldest;
627                         unsigned int new_hash;
628
629                         hlist_del(&fi->fib_lhash);
630
631                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
632                         ldest = &new_laddrhash[new_hash];
633                         hlist_add_head(&fi->fib_lhash, ldest);
634                 }
635         }
636         fib_info_laddrhash = new_laddrhash;
637
638         write_unlock(&fib_info_lock);
639 }
640
641 struct fib_info *
642 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
643                 const struct nlmsghdr *nlh, int *errp)
644 {
645         int err;
646         struct fib_info *fi = NULL;
647         struct fib_info *ofi;
648 #ifdef CONFIG_IP_ROUTE_MULTIPATH
649         int nhs = 1;
650 #else
651         const int nhs = 1;
652 #endif
653 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
654         u32 mp_alg = IP_MP_ALG_NONE;
655 #endif
656
657         /* Fast check to catch the most weird cases */
658         if (fib_props[r->rtm_type].scope > r->rtm_scope)
659                 goto err_inval;
660
661 #ifdef CONFIG_IP_ROUTE_MULTIPATH
662         if (rta->rta_mp) {
663                 nhs = fib_count_nexthops(rta->rta_mp);
664                 if (nhs == 0)
665                         goto err_inval;
666         }
667 #endif
668 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
669         if (rta->rta_mp_alg) {
670                 mp_alg = *rta->rta_mp_alg;
671
672                 if (mp_alg < IP_MP_ALG_NONE ||
673                     mp_alg > IP_MP_ALG_MAX)
674                         goto err_inval;
675         }
676 #endif
677
678         err = -ENOBUFS;
679         if (fib_info_cnt >= fib_hash_size) {
680                 unsigned int new_size = fib_hash_size << 1;
681                 struct hlist_head *new_info_hash;
682                 struct hlist_head *new_laddrhash;
683                 unsigned int bytes;
684
685                 if (!new_size)
686                         new_size = 1;
687                 bytes = new_size * sizeof(struct hlist_head *);
688                 new_info_hash = fib_hash_alloc(bytes);
689                 new_laddrhash = fib_hash_alloc(bytes);
690                 if (!new_info_hash || !new_laddrhash) {
691                         fib_hash_free(new_info_hash, bytes);
692                         fib_hash_free(new_laddrhash, bytes);
693                 } else {
694                         memset(new_info_hash, 0, bytes);
695                         memset(new_laddrhash, 0, bytes);
696
697                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
698                 }
699
700                 if (!fib_hash_size)
701                         goto failure;
702         }
703
704         fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
705         if (fi == NULL)
706                 goto failure;
707         fib_info_cnt++;
708         memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
709
710         fi->fib_protocol = r->rtm_protocol;
711
712         fi->fib_nhs = nhs;
713         change_nexthops(fi) {
714                 nh->nh_parent = fi;
715         } endfor_nexthops(fi)
716
717         fi->fib_flags = r->rtm_flags;
718         if (rta->rta_priority)
719                 fi->fib_priority = *rta->rta_priority;
720         if (rta->rta_mx) {
721                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
722                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
723
724                 while (RTA_OK(attr, attrlen)) {
725                         unsigned flavor = attr->rta_type;
726                         if (flavor) {
727                                 if (flavor > RTAX_MAX)
728                                         goto err_inval;
729                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
730                         }
731                         attr = RTA_NEXT(attr, attrlen);
732                 }
733         }
734         if (rta->rta_prefsrc)
735                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
736
737         if (rta->rta_mp) {
738 #ifdef CONFIG_IP_ROUTE_MULTIPATH
739                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
740                         goto failure;
741                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
742                         goto err_inval;
743                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
744                         goto err_inval;
745 #ifdef CONFIG_NET_CLS_ROUTE
746                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
747                         goto err_inval;
748 #endif
749 #else
750                 goto err_inval;
751 #endif
752         } else {
753                 struct fib_nh *nh = fi->fib_nh;
754                 if (rta->rta_oif)
755                         nh->nh_oif = *rta->rta_oif;
756                 if (rta->rta_gw)
757                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
758 #ifdef CONFIG_NET_CLS_ROUTE
759                 if (rta->rta_flow)
760                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
761 #endif
762                 nh->nh_flags = r->rtm_flags;
763 #ifdef CONFIG_IP_ROUTE_MULTIPATH
764                 nh->nh_weight = 1;
765 #endif
766         }
767
768 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
769         fi->fib_mp_alg = mp_alg;
770 #endif
771
772         if (fib_props[r->rtm_type].error) {
773                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
774                         goto err_inval;
775                 goto link_it;
776         }
777
778         if (r->rtm_scope > RT_SCOPE_HOST)
779                 goto err_inval;
780
781         if (r->rtm_scope == RT_SCOPE_HOST) {
782                 struct fib_nh *nh = fi->fib_nh;
783
784                 /* Local address is added. */
785                 if (nhs != 1 || nh->nh_gw)
786                         goto err_inval;
787                 nh->nh_scope = RT_SCOPE_NOWHERE;
788                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
789                 err = -ENODEV;
790                 if (nh->nh_dev == NULL)
791                         goto failure;
792         } else {
793                 change_nexthops(fi) {
794                         if ((err = fib_check_nh(r, fi, nh)) != 0)
795                                 goto failure;
796                 } endfor_nexthops(fi)
797         }
798
799         if (fi->fib_prefsrc) {
800                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
801                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
802                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
803                                 goto err_inval;
804         }
805
806 link_it:
807         if ((ofi = fib_find_info(fi)) != NULL) {
808                 fi->fib_dead = 1;
809                 free_fib_info(fi);
810                 ofi->fib_treeref++;
811                 return ofi;
812         }
813
814         fi->fib_treeref++;
815         atomic_inc(&fi->fib_clntref);
816         write_lock(&fib_info_lock);
817         hlist_add_head(&fi->fib_hash,
818                        &fib_info_hash[fib_info_hashfn(fi)]);
819         if (fi->fib_prefsrc) {
820                 struct hlist_head *head;
821
822                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
823                 hlist_add_head(&fi->fib_lhash, head);
824         }
825         change_nexthops(fi) {
826                 struct hlist_head *head;
827                 unsigned int hash;
828
829                 if (!nh->nh_dev)
830                         continue;
831                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
832                 head = &fib_info_devhash[hash];
833                 hlist_add_head(&nh->nh_hash, head);
834         } endfor_nexthops(fi)
835         write_unlock(&fib_info_lock);
836         return fi;
837
838 err_inval:
839         err = -EINVAL;
840
841 failure:
842         *errp = err;
843         if (fi) {
844                 fi->fib_dead = 1;
845                 free_fib_info(fi);
846         }
847         return NULL;
848 }
849
850 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
851                        struct fib_result *res, __u32 zone, __u32 mask, 
852                         int prefixlen)
853 {
854         struct fib_alias *fa;
855         int nh_sel = 0;
856
857         list_for_each_entry(fa, head, fa_list) {
858                 int err;
859
860                 if (fa->fa_tos &&
861                     fa->fa_tos != flp->fl4_tos)
862                         continue;
863
864                 if (fa->fa_scope < flp->fl4_scope)
865                         continue;
866
867                 fa->fa_state |= FA_S_ACCESSED;
868
869                 err = fib_props[fa->fa_type].error;
870                 if (err == 0) {
871                         struct fib_info *fi = fa->fa_info;
872
873                         if (fi->fib_flags & RTNH_F_DEAD)
874                                 continue;
875
876                         switch (fa->fa_type) {
877                         case RTN_UNICAST:
878                         case RTN_LOCAL:
879                         case RTN_BROADCAST:
880                         case RTN_ANYCAST:
881                         case RTN_MULTICAST:
882                                 for_nexthops(fi) {
883                                         if (nh->nh_flags&RTNH_F_DEAD)
884                                                 continue;
885                                         if (!flp->oif || flp->oif == nh->nh_oif)
886                                                 break;
887                                 }
888 #ifdef CONFIG_IP_ROUTE_MULTIPATH
889                                 if (nhsel < fi->fib_nhs) {
890                                         nh_sel = nhsel;
891                                         goto out_fill_res;
892                                 }
893 #else
894                                 if (nhsel < 1) {
895                                         goto out_fill_res;
896                                 }
897 #endif
898                                 endfor_nexthops(fi);
899                                 continue;
900
901                         default:
902                                 printk(KERN_DEBUG "impossible 102\n");
903                                 return -EINVAL;
904                         };
905                 }
906                 return err;
907         }
908         return 1;
909
910 out_fill_res:
911         res->prefixlen = prefixlen;
912         res->nh_sel = nh_sel;
913         res->type = fa->fa_type;
914         res->scope = fa->fa_scope;
915         res->fi = fa->fa_info;
916 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
917         res->netmask = mask;
918         res->network = zone &
919                 (0xFFFFFFFF >> (32 - prefixlen));
920 #endif
921         atomic_inc(&res->fi->fib_clntref);
922         return 0;
923 }
924
925 /* Find appropriate source address to this destination */
926
927 u32 __fib_res_prefsrc(struct fib_result *res)
928 {
929         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
930 }
931
932 int
933 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
934               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
935               struct fib_info *fi)
936 {
937         struct rtmsg *rtm;
938         struct nlmsghdr  *nlh;
939         unsigned char    *b = skb->tail;
940
941         nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
942         rtm = NLMSG_DATA(nlh);
943         rtm->rtm_family = AF_INET;
944         rtm->rtm_dst_len = dst_len;
945         rtm->rtm_src_len = 0;
946         rtm->rtm_tos = tos;
947         rtm->rtm_table = tb_id;
948         rtm->rtm_type = type;
949         rtm->rtm_flags = fi->fib_flags;
950         rtm->rtm_scope = scope;
951         if (rtm->rtm_dst_len)
952                 RTA_PUT(skb, RTA_DST, 4, dst);
953         rtm->rtm_protocol = fi->fib_protocol;
954         if (fi->fib_priority)
955                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
956 #ifdef CONFIG_NET_CLS_ROUTE
957         if (fi->fib_nh[0].nh_tclassid)
958                 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
959 #endif
960         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
961                 goto rtattr_failure;
962         if (fi->fib_prefsrc)
963                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
964         if (fi->fib_nhs == 1) {
965                 if (fi->fib_nh->nh_gw)
966                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
967                 if (fi->fib_nh->nh_oif)
968                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
969         }
970 #ifdef CONFIG_IP_ROUTE_MULTIPATH
971         if (fi->fib_nhs > 1) {
972                 struct rtnexthop *nhp;
973                 struct rtattr *mp_head;
974                 if (skb_tailroom(skb) <= RTA_SPACE(0))
975                         goto rtattr_failure;
976                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
977
978                 for_nexthops(fi) {
979                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
980                                 goto rtattr_failure;
981                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
982                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
983                         nhp->rtnh_hops = nh->nh_weight-1;
984                         nhp->rtnh_ifindex = nh->nh_oif;
985                         if (nh->nh_gw)
986                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
987                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
988                 } endfor_nexthops(fi);
989                 mp_head->rta_type = RTA_MULTIPATH;
990                 mp_head->rta_len = skb->tail - (u8*)mp_head;
991         }
992 #endif
993         nlh->nlmsg_len = skb->tail - b;
994         return skb->len;
995
996 nlmsg_failure:
997 rtattr_failure:
998         skb_trim(skb, b - skb->data);
999         return -1;
1000 }
1001
1002 #ifndef CONFIG_IP_NOSIOCRT
1003
1004 int
1005 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1006                     struct kern_rta *rta, struct rtentry *r)
1007 {
1008         int    plen;
1009         u32    *ptr;
1010
1011         memset(rtm, 0, sizeof(*rtm));
1012         memset(rta, 0, sizeof(*rta));
1013
1014         if (r->rt_dst.sa_family != AF_INET)
1015                 return -EAFNOSUPPORT;
1016
1017         /* Check mask for validity:
1018            a) it must be contiguous.
1019            b) destination must have all host bits clear.
1020            c) if application forgot to set correct family (AF_INET),
1021               reject request unless it is absolutely clear i.e.
1022               both family and mask are zero.
1023          */
1024         plen = 32;
1025         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1026         if (!(r->rt_flags&RTF_HOST)) {
1027                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1028                 if (r->rt_genmask.sa_family != AF_INET) {
1029                         if (mask || r->rt_genmask.sa_family)
1030                                 return -EAFNOSUPPORT;
1031                 }
1032                 if (bad_mask(mask, *ptr))
1033                         return -EINVAL;
1034                 plen = inet_mask_len(mask);
1035         }
1036
1037         nl->nlmsg_flags = NLM_F_REQUEST;
1038         nl->nlmsg_pid = 0;
1039         nl->nlmsg_seq = 0;
1040         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1041         if (cmd == SIOCDELRT) {
1042                 nl->nlmsg_type = RTM_DELROUTE;
1043                 nl->nlmsg_flags = 0;
1044         } else {
1045                 nl->nlmsg_type = RTM_NEWROUTE;
1046                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1047                 rtm->rtm_protocol = RTPROT_BOOT;
1048         }
1049
1050         rtm->rtm_dst_len = plen;
1051         rta->rta_dst = ptr;
1052
1053         if (r->rt_metric) {
1054                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1055                 rta->rta_priority = (u32*)&r->rt_pad3;
1056         }
1057         if (r->rt_flags&RTF_REJECT) {
1058                 rtm->rtm_scope = RT_SCOPE_HOST;
1059                 rtm->rtm_type = RTN_UNREACHABLE;
1060                 return 0;
1061         }
1062         rtm->rtm_scope = RT_SCOPE_NOWHERE;
1063         rtm->rtm_type = RTN_UNICAST;
1064
1065         if (r->rt_dev) {
1066                 char *colon;
1067                 struct net_device *dev;
1068                 char   devname[IFNAMSIZ];
1069
1070                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1071                         return -EFAULT;
1072                 devname[IFNAMSIZ-1] = 0;
1073                 colon = strchr(devname, ':');
1074                 if (colon)
1075                         *colon = 0;
1076                 dev = __dev_get_by_name(devname);
1077                 if (!dev)
1078                         return -ENODEV;
1079                 rta->rta_oif = &dev->ifindex;
1080                 if (colon) {
1081                         struct in_ifaddr *ifa;
1082                         struct in_device *in_dev = __in_dev_get(dev);
1083                         if (!in_dev)
1084                                 return -ENODEV;
1085                         *colon = ':';
1086                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1087                                 if (strcmp(ifa->ifa_label, devname) == 0)
1088                                         break;
1089                         if (ifa == NULL)
1090                                 return -ENODEV;
1091                         rta->rta_prefsrc = &ifa->ifa_local;
1092                 }
1093         }
1094
1095         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1096         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1097                 rta->rta_gw = ptr;
1098                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1099                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1100         }
1101
1102         if (cmd == SIOCDELRT)
1103                 return 0;
1104
1105         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1106                 return -EINVAL;
1107
1108         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1109                 rtm->rtm_scope = RT_SCOPE_LINK;
1110
1111         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1112                 struct rtattr *rec;
1113                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1114                 if (mx == NULL)
1115                         return -ENOMEM;
1116                 rta->rta_mx = mx;
1117                 mx->rta_type = RTA_METRICS;
1118                 mx->rta_len  = RTA_LENGTH(0);
1119                 if (r->rt_flags&RTF_MTU) {
1120                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1121                         rec->rta_type = RTAX_ADVMSS;
1122                         rec->rta_len = RTA_LENGTH(4);
1123                         mx->rta_len += RTA_LENGTH(4);
1124                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1125                 }
1126                 if (r->rt_flags&RTF_WINDOW) {
1127                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1128                         rec->rta_type = RTAX_WINDOW;
1129                         rec->rta_len = RTA_LENGTH(4);
1130                         mx->rta_len += RTA_LENGTH(4);
1131                         *(u32*)RTA_DATA(rec) = r->rt_window;
1132                 }
1133                 if (r->rt_flags&RTF_IRTT) {
1134                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1135                         rec->rta_type = RTAX_RTT;
1136                         rec->rta_len = RTA_LENGTH(4);
1137                         mx->rta_len += RTA_LENGTH(4);
1138                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1139                 }
1140         }
1141         return 0;
1142 }
1143
1144 #endif
1145
1146 /*
1147    Update FIB if:
1148    - local address disappeared -> we must delete all the entries
1149      referring to it.
1150    - device went down -> we must shutdown all nexthops going via it.
1151  */
1152
1153 int fib_sync_down(u32 local, struct net_device *dev, int force)
1154 {
1155         int ret = 0;
1156         int scope = RT_SCOPE_NOWHERE;
1157         
1158         if (force)
1159                 scope = -1;
1160
1161         if (local && fib_info_laddrhash) {
1162                 unsigned int hash = fib_laddr_hashfn(local);
1163                 struct hlist_head *head = &fib_info_laddrhash[hash];
1164                 struct hlist_node *node;
1165                 struct fib_info *fi;
1166
1167                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1168                         if (fi->fib_prefsrc == local) {
1169                                 fi->fib_flags |= RTNH_F_DEAD;
1170                                 ret++;
1171                         }
1172                 }
1173         }
1174
1175         if (dev) {
1176                 struct fib_info *prev_fi = NULL;
1177                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1178                 struct hlist_head *head = &fib_info_devhash[hash];
1179                 struct hlist_node *node;
1180                 struct fib_nh *nh;
1181
1182                 hlist_for_each_entry(nh, node, head, nh_hash) {
1183                         struct fib_info *fi = nh->nh_parent;
1184                         int dead;
1185
1186                         BUG_ON(!fi->fib_nhs);
1187                         if (nh->nh_dev != dev || fi == prev_fi)
1188                                 continue;
1189                         prev_fi = fi;
1190                         dead = 0;
1191                         change_nexthops(fi) {
1192                                 if (nh->nh_flags&RTNH_F_DEAD)
1193                                         dead++;
1194                                 else if (nh->nh_dev == dev &&
1195                                          nh->nh_scope != scope) {
1196                                         nh->nh_flags |= RTNH_F_DEAD;
1197 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1198                                         spin_lock_bh(&fib_multipath_lock);
1199                                         fi->fib_power -= nh->nh_power;
1200                                         nh->nh_power = 0;
1201                                         spin_unlock_bh(&fib_multipath_lock);
1202 #endif
1203                                         dead++;
1204                                 }
1205 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1206                                 if (force > 1 && nh->nh_dev == dev) {
1207                                         dead = fi->fib_nhs;
1208                                         break;
1209                                 }
1210 #endif
1211                         } endfor_nexthops(fi)
1212                         if (dead == fi->fib_nhs) {
1213                                 fi->fib_flags |= RTNH_F_DEAD;
1214                                 ret++;
1215                         }
1216                 }
1217         }
1218
1219         return ret;
1220 }
1221
1222 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1223
1224 /*
1225    Dead device goes up. We wake up dead nexthops.
1226    It takes sense only on multipath routes.
1227  */
1228
1229 int fib_sync_up(struct net_device *dev)
1230 {
1231         struct fib_info *prev_fi;
1232         unsigned int hash;
1233         struct hlist_head *head;
1234         struct hlist_node *node;
1235         struct fib_nh *nh;
1236         int ret;
1237
1238         if (!(dev->flags&IFF_UP))
1239                 return 0;
1240
1241         prev_fi = NULL;
1242         hash = fib_devindex_hashfn(dev->ifindex);
1243         head = &fib_info_devhash[hash];
1244         ret = 0;
1245
1246         hlist_for_each_entry(nh, node, head, nh_hash) {
1247                 struct fib_info *fi = nh->nh_parent;
1248                 int alive;
1249
1250                 BUG_ON(!fi->fib_nhs);
1251                 if (nh->nh_dev != dev || fi == prev_fi)
1252                         continue;
1253
1254                 prev_fi = fi;
1255                 alive = 0;
1256                 change_nexthops(fi) {
1257                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1258                                 alive++;
1259                                 continue;
1260                         }
1261                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1262                                 continue;
1263                         if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
1264                                 continue;
1265                         alive++;
1266                         spin_lock_bh(&fib_multipath_lock);
1267                         nh->nh_power = 0;
1268                         nh->nh_flags &= ~RTNH_F_DEAD;
1269                         spin_unlock_bh(&fib_multipath_lock);
1270                 } endfor_nexthops(fi)
1271
1272                 if (alive > 0) {
1273                         fi->fib_flags &= ~RTNH_F_DEAD;
1274                         ret++;
1275                 }
1276         }
1277
1278         return ret;
1279 }
1280
1281 /*
1282    The algorithm is suboptimal, but it provides really
1283    fair weighted route distribution.
1284  */
1285
1286 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1287 {
1288         struct fib_info *fi = res->fi;
1289         int w;
1290
1291         spin_lock_bh(&fib_multipath_lock);
1292         if (fi->fib_power <= 0) {
1293                 int power = 0;
1294                 change_nexthops(fi) {
1295                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1296                                 power += nh->nh_weight;
1297                                 nh->nh_power = nh->nh_weight;
1298                         }
1299                 } endfor_nexthops(fi);
1300                 fi->fib_power = power;
1301                 if (power <= 0) {
1302                         spin_unlock_bh(&fib_multipath_lock);
1303                         /* Race condition: route has just become dead. */
1304                         res->nh_sel = 0;
1305                         return;
1306                 }
1307         }
1308
1309
1310         /* w should be random number [0..fi->fib_power-1],
1311            it is pretty bad approximation.
1312          */
1313
1314         w = jiffies % fi->fib_power;
1315
1316         change_nexthops(fi) {
1317                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1318                         if ((w -= nh->nh_power) <= 0) {
1319                                 nh->nh_power--;
1320                                 fi->fib_power--;
1321                                 res->nh_sel = nhsel;
1322                                 spin_unlock_bh(&fib_multipath_lock);
1323                                 return;
1324                         }
1325                 }
1326         } endfor_nexthops(fi);
1327
1328         /* Race condition: route has just become dead. */
1329         res->nh_sel = 0;
1330         spin_unlock_bh(&fib_multipath_lock);
1331 }
1332 #endif