vserver 1.9.5.x5
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *,
88                                        struct net_device *dev, int how);
89 static int               ip6_dst_gc(void);
90
91 static int              ip6_pkt_discard(struct sk_buff *skb);
92 static int              ip6_pkt_discard_out(struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96 static struct dst_ops ip6_dst_ops = {
97         .family                 =       AF_INET6,
98         .protocol               =       __constant_htons(ETH_P_IPV6),
99         .gc                     =       ip6_dst_gc,
100         .gc_thresh              =       1024,
101         .check                  =       ip6_dst_check,
102         .destroy                =       ip6_dst_destroy,
103         .ifdown                 =       ip6_dst_ifdown,
104         .negative_advice        =       ip6_negative_advice,
105         .link_failure           =       ip6_link_failure,
106         .update_pmtu            =       ip6_rt_update_pmtu,
107         .entry_size             =       sizeof(struct rt6_info),
108 };
109
110 struct rt6_info ip6_null_entry = {
111         .u = {
112                 .dst = {
113                         .__refcnt       = ATOMIC_INIT(1),
114                         .__use          = 1,
115                         .dev            = &loopback_dev,
116                         .obsolete       = -1,
117                         .error          = -ENETUNREACH,
118                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
119                         .input          = ip6_pkt_discard,
120                         .output         = ip6_pkt_discard_out,
121                         .ops            = &ip6_dst_ops,
122                         .path           = (struct dst_entry*)&ip6_null_entry,
123                 }
124         },
125         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
126         .rt6i_metric    = ~(u32) 0,
127         .rt6i_ref       = ATOMIC_INIT(1),
128 };
129
130 struct fib6_node ip6_routing_table = {
131         .leaf           = &ip6_null_entry,
132         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134
135 /* Protects all the ip6 fib */
136
137 DEFINE_RWLOCK(rt6_lock);
138
139
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148         struct rt6_info *rt = (struct rt6_info *)dst;
149         struct inet6_dev *idev = rt->rt6i_idev;
150
151         if (idev != NULL) {
152                 rt->rt6i_idev = NULL;
153                 in6_dev_put(idev);
154         }       
155 }
156
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158                            int how)
159 {
160         struct rt6_info *rt = (struct rt6_info *)dst;
161         struct inet6_dev *idev = rt->rt6i_idev;
162
163         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165                 if (loopback_idev != NULL) {
166                         rt->rt6i_idev = loopback_idev;
167                         in6_dev_put(idev);
168                 }
169         }
170 }
171
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174         return (rt->rt6i_flags & RTF_EXPIRES &&
175                 time_after(jiffies, rt->rt6i_expires));
176 }
177
178 /*
179  *      Route lookup. Any rt6_lock is implied.
180  */
181
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183                                                     int oif,
184                                                     int strict)
185 {
186         struct rt6_info *local = NULL;
187         struct rt6_info *sprt;
188
189         if (oif) {
190                 for (sprt = rt; sprt; sprt = sprt->u.next) {
191                         struct net_device *dev = sprt->rt6i_dev;
192                         if (dev->ifindex == oif)
193                                 return sprt;
194                         if (dev->flags & IFF_LOOPBACK) {
195                                 if (sprt->rt6i_idev == NULL ||
196                                     sprt->rt6i_idev->dev->ifindex != oif) {
197                                         if (strict && oif)
198                                                 continue;
199                                         if (local && (!oif || 
200                                                       local->rt6i_idev->dev->ifindex == oif))
201                                                 continue;
202                                 }
203                                 local = sprt;
204                         }
205                 }
206
207                 if (local)
208                         return local;
209
210                 if (strict)
211                         return &ip6_null_entry;
212         }
213         return rt;
214 }
215
216 /*
217  *      pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224         spin_lock_bh(&rt6_dflt_lock);
225         if (rt == NULL || rt == rt6_dflt_pointer) {
226                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227                 rt6_dflt_pointer = NULL;
228         }
229         spin_unlock_bh(&rt6_dflt_lock);
230 }
231
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235         struct rt6_info *match = NULL;
236         struct rt6_info *sprt;
237         int mpri = 0;
238
239         for (sprt = rt; sprt; sprt = sprt->u.next) {
240                 struct neighbour *neigh;
241                 int m = 0;
242
243                 if (!oif ||
244                     (sprt->rt6i_dev &&
245                      sprt->rt6i_dev->ifindex == oif))
246                         m += 8;
247
248                 if (rt6_check_expired(sprt))
249                         continue;
250
251                 if (sprt == rt6_dflt_pointer)
252                         m += 4;
253
254                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255                         read_lock_bh(&neigh->lock);
256                         switch (neigh->nud_state) {
257                         case NUD_REACHABLE:
258                                 m += 3;
259                                 break;
260
261                         case NUD_STALE:
262                         case NUD_DELAY:
263                         case NUD_PROBE:
264                                 m += 2;
265                                 break;
266
267                         case NUD_NOARP:
268                         case NUD_PERMANENT:
269                                 m += 1;
270                                 break;
271
272                         case NUD_INCOMPLETE:
273                         default:
274                                 read_unlock_bh(&neigh->lock);
275                                 continue;
276                         }
277                         read_unlock_bh(&neigh->lock);
278                 } else {
279                         continue;
280                 }
281
282                 if (m > mpri || m >= 12) {
283                         match = sprt;
284                         mpri = m;
285                         if (m >= 12) {
286                                 /* we choose the last default router if it
287                                  * is in (probably) reachable state.
288                                  * If route changed, we should do pmtu
289                                  * discovery. --yoshfuji
290                                  */
291                                 break;
292                         }
293                 }
294         }
295
296         spin_lock(&rt6_dflt_lock);
297         if (!match) {
298                 /*
299                  *      No default routers are known to be reachable.
300                  *      SHOULD round robin
301                  */
302                 if (rt6_dflt_pointer) {
303                         for (sprt = rt6_dflt_pointer->u.next;
304                              sprt; sprt = sprt->u.next) {
305                                 if (sprt->u.dst.obsolete <= 0 &&
306                                     sprt->u.dst.error == 0 &&
307                                     !rt6_check_expired(sprt)) {
308                                         match = sprt;
309                                         break;
310                                 }
311                         }
312                         for (sprt = rt;
313                              !match && sprt;
314                              sprt = sprt->u.next) {
315                                 if (sprt->u.dst.obsolete <= 0 &&
316                                     sprt->u.dst.error == 0 &&
317                                     !rt6_check_expired(sprt)) {
318                                         match = sprt;
319                                         break;
320                                 }
321                                 if (sprt == rt6_dflt_pointer)
322                                         break;
323                         }
324                 }
325         }
326
327         if (match) {
328                 if (rt6_dflt_pointer != match)
329                         RT6_TRACE("changed default router: %p->%p\n",
330                                   rt6_dflt_pointer, match);
331                 rt6_dflt_pointer = match;
332         }
333         spin_unlock(&rt6_dflt_lock);
334
335         if (!match) {
336                 /*
337                  * Last Resort: if no default routers found, 
338                  * use addrconf default route.
339                  * We don't record this route.
340                  */
341                 for (sprt = ip6_routing_table.leaf;
342                      sprt; sprt = sprt->u.next) {
343                         if (!rt6_check_expired(sprt) &&
344                             (sprt->rt6i_flags & RTF_DEFAULT) &&
345                             (!oif ||
346                              (sprt->rt6i_dev &&
347                               sprt->rt6i_dev->ifindex == oif))) {
348                                 match = sprt;
349                                 break;
350                         }
351                 }
352                 if (!match) {
353                         /* no default route.  give up. */
354                         match = &ip6_null_entry;
355                 }
356         }
357
358         return match;
359 }
360
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362                             int oif, int strict)
363 {
364         struct fib6_node *fn;
365         struct rt6_info *rt;
366
367         read_lock_bh(&rt6_lock);
368         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369         rt = rt6_device_match(fn->leaf, oif, strict);
370         dst_hold(&rt->u.dst);
371         rt->u.dst.__use++;
372         read_unlock_bh(&rt6_lock);
373
374         rt->u.dst.lastuse = jiffies;
375         if (rt->u.dst.error == 0)
376                 return rt;
377         dst_release(&rt->u.dst);
378         return NULL;
379 }
380
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
388 {
389         int err;
390
391         write_lock_bh(&rt6_lock);
392         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393         write_unlock_bh(&rt6_lock);
394
395         return err;
396 }
397
398 /* No rt6_lock! If COW failed, the function returns dead route entry
399    with dst->error set to errno value.
400  */
401
402 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403                                 struct in6_addr *saddr)
404 {
405         int err;
406         struct rt6_info *rt;
407
408         /*
409          *      Clone the route.
410          */
411
412         rt = ip6_rt_copy(ort);
413
414         if (rt) {
415                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
416
417                 if (!(rt->rt6i_flags&RTF_GATEWAY))
418                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419
420                 rt->rt6i_dst.plen = 128;
421                 rt->rt6i_flags |= RTF_CACHE;
422                 rt->u.dst.flags |= DST_HOST;
423
424 #ifdef CONFIG_IPV6_SUBTREES
425                 if (rt->rt6i_src.plen && saddr) {
426                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427                         rt->rt6i_src.plen = 128;
428                 }
429 #endif
430
431                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432
433                 dst_hold(&rt->u.dst);
434
435                 err = ip6_ins_rt(rt, NULL, NULL);
436                 if (err == 0)
437                         return rt;
438
439                 rt->u.dst.error = err;
440
441                 return rt;
442         }
443         dst_hold(&ip6_null_entry.u.dst);
444         return &ip6_null_entry;
445 }
446
447 #define BACKTRACK() \
448 if (rt == &ip6_null_entry && strict) { \
449        while ((fn = fn->parent) != NULL) { \
450                 if (fn->fn_flags & RTN_ROOT) { \
451                         dst_hold(&rt->u.dst); \
452                         goto out; \
453                 } \
454                 if (fn->fn_flags & RTN_RTINFO) \
455                         goto restart; \
456         } \
457 }
458
459
460 void ip6_route_input(struct sk_buff *skb)
461 {
462         struct fib6_node *fn;
463         struct rt6_info *rt;
464         int strict;
465         int attempts = 3;
466
467         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
468
469 relookup:
470         read_lock_bh(&rt6_lock);
471
472         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473                          &skb->nh.ipv6h->saddr);
474
475 restart:
476         rt = fn->leaf;
477
478         if ((rt->rt6i_flags & RTF_CACHE)) {
479                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
480                 BACKTRACK();
481                 dst_hold(&rt->u.dst);
482                 goto out;
483         }
484
485         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
486         BACKTRACK();
487
488         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489                 struct rt6_info *nrt;
490                 dst_hold(&rt->u.dst);
491                 read_unlock_bh(&rt6_lock);
492
493                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494                               &skb->nh.ipv6h->saddr);
495
496                 dst_release(&rt->u.dst);
497                 rt = nrt;
498
499                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500                         goto out2;
501
502                 /* Race condition! In the gap, when rt6_lock was
503                    released someone could insert this route.  Relookup.
504                 */
505                 dst_release(&rt->u.dst);
506                 goto relookup;
507         }
508         dst_hold(&rt->u.dst);
509
510 out:
511         read_unlock_bh(&rt6_lock);
512 out2:
513         rt->u.dst.lastuse = jiffies;
514         rt->u.dst.__use++;
515         skb->dst = (struct dst_entry *) rt;
516 }
517
518 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
519 {
520         struct fib6_node *fn;
521         struct rt6_info *rt;
522         int strict;
523         int attempts = 3;
524
525         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
526
527 relookup:
528         read_lock_bh(&rt6_lock);
529
530         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
531
532 restart:
533         rt = fn->leaf;
534
535         if ((rt->rt6i_flags & RTF_CACHE)) {
536                 rt = rt6_device_match(rt, fl->oif, strict);
537                 BACKTRACK();
538                 dst_hold(&rt->u.dst);
539                 goto out;
540         }
541         if (rt->rt6i_flags & RTF_DEFAULT) {
542                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543                         rt = rt6_best_dflt(rt, fl->oif);
544         } else {
545                 rt = rt6_device_match(rt, fl->oif, strict);
546                 BACKTRACK();
547         }
548
549         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550                 struct rt6_info *nrt;
551                 dst_hold(&rt->u.dst);
552                 read_unlock_bh(&rt6_lock);
553
554                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
555
556                 dst_release(&rt->u.dst);
557                 rt = nrt;
558
559                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
560                         goto out2;
561
562                 /* Race condition! In the gap, when rt6_lock was
563                    released someone could insert this route.  Relookup.
564                 */
565                 dst_release(&rt->u.dst);
566                 goto relookup;
567         }
568         dst_hold(&rt->u.dst);
569
570 out:
571         read_unlock_bh(&rt6_lock);
572 out2:
573         rt->u.dst.lastuse = jiffies;
574         rt->u.dst.__use++;
575         return &rt->u.dst;
576 }
577
578
579 /*
580  *      Destination cache support functions
581  */
582
583 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
584 {
585         struct rt6_info *rt;
586
587         rt = (struct rt6_info *) dst;
588
589         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
590                 return dst;
591
592         dst_release(dst);
593         return NULL;
594 }
595
596 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
597 {
598         struct rt6_info *rt = (struct rt6_info *) dst;
599
600         if (rt) {
601                 if (rt->rt6i_flags & RTF_CACHE)
602                         ip6_del_rt(rt, NULL, NULL);
603                 else
604                         dst_release(dst);
605         }
606         return NULL;
607 }
608
609 static void ip6_link_failure(struct sk_buff *skb)
610 {
611         struct rt6_info *rt;
612
613         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
614
615         rt = (struct rt6_info *) skb->dst;
616         if (rt) {
617                 if (rt->rt6i_flags&RTF_CACHE) {
618                         dst_set_expires(&rt->u.dst, 0);
619                         rt->rt6i_flags |= RTF_EXPIRES;
620                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
621                         rt->rt6i_node->fn_sernum = -1;
622         }
623 }
624
625 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
626 {
627         struct rt6_info *rt6 = (struct rt6_info*)dst;
628
629         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
630                 rt6->rt6i_flags |= RTF_MODIFIED;
631                 if (mtu < IPV6_MIN_MTU)
632                         mtu = IPV6_MIN_MTU;
633                 dst->metrics[RTAX_MTU-1] = mtu;
634         }
635 }
636
637 /* Protected by rt6_lock.  */
638 static struct dst_entry *ndisc_dst_gc_list;
639 static int ipv6_get_mtu(struct net_device *dev);
640
641 static inline unsigned int ipv6_advmss(unsigned int mtu)
642 {
643         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
644
645         if (mtu < ip6_rt_min_advmss)
646                 mtu = ip6_rt_min_advmss;
647
648         /*
649          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
650          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
651          * IPV6_MAXPLEN is also valid and means: "any MSS, 
652          * rely only on pmtu discovery"
653          */
654         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
655                 mtu = IPV6_MAXPLEN;
656         return mtu;
657 }
658
659 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
660                                   struct neighbour *neigh,
661                                   struct in6_addr *addr,
662                                   int (*output)(struct sk_buff *))
663 {
664         struct rt6_info *rt;
665         struct inet6_dev *idev = in6_dev_get(dev);
666
667         if (unlikely(idev == NULL))
668                 return NULL;
669
670         rt = ip6_dst_alloc();
671         if (unlikely(rt == NULL)) {
672                 in6_dev_put(idev);
673                 goto out;
674         }
675
676         dev_hold(dev);
677         if (neigh)
678                 neigh_hold(neigh);
679         else
680                 neigh = ndisc_get_neigh(dev, addr);
681
682         rt->rt6i_dev      = dev;
683         rt->rt6i_idev     = idev;
684         rt->rt6i_nexthop  = neigh;
685         atomic_set(&rt->u.dst.__refcnt, 1);
686         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
687         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
688         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
689         rt->u.dst.output  = output;
690
691 #if 0   /* there's no chance to use these for ndisc */
692         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
693                                 ? DST_HOST 
694                                 : 0;
695         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
696         rt->rt6i_dst.plen = 128;
697 #endif
698
699         write_lock_bh(&rt6_lock);
700         rt->u.dst.next = ndisc_dst_gc_list;
701         ndisc_dst_gc_list = &rt->u.dst;
702         write_unlock_bh(&rt6_lock);
703
704         fib6_force_start_gc();
705
706 out:
707         return (struct dst_entry *)rt;
708 }
709
710 int ndisc_dst_gc(int *more)
711 {
712         struct dst_entry *dst, *next, **pprev;
713         int freed;
714
715         next = NULL;
716         pprev = &ndisc_dst_gc_list;
717         freed = 0;
718         while ((dst = *pprev) != NULL) {
719                 if (!atomic_read(&dst->__refcnt)) {
720                         *pprev = dst->next;
721                         dst_free(dst);
722                         freed++;
723                 } else {
724                         pprev = &dst->next;
725                         (*more)++;
726                 }
727         }
728
729         return freed;
730 }
731
732 static int ip6_dst_gc(void)
733 {
734         static unsigned expire = 30*HZ;
735         static unsigned long last_gc;
736         unsigned long now = jiffies;
737
738         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
739             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
740                 goto out;
741
742         expire++;
743         fib6_run_gc(expire);
744         last_gc = now;
745         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
746                 expire = ip6_rt_gc_timeout>>1;
747
748 out:
749         expire -= expire>>ip6_rt_gc_elasticity;
750         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
751 }
752
753 /* Clean host part of a prefix. Not necessary in radix tree,
754    but results in cleaner routing tables.
755
756    Remove it only when all the things will work!
757  */
758
759 static int ipv6_get_mtu(struct net_device *dev)
760 {
761         int mtu = IPV6_MIN_MTU;
762         struct inet6_dev *idev;
763
764         idev = in6_dev_get(dev);
765         if (idev) {
766                 mtu = idev->cnf.mtu6;
767                 in6_dev_put(idev);
768         }
769         return mtu;
770 }
771
772 static int ipv6_get_hoplimit(struct net_device *dev)
773 {
774         int hoplimit = ipv6_devconf.hop_limit;
775         struct inet6_dev *idev;
776
777         idev = in6_dev_get(dev);
778         if (idev) {
779                 hoplimit = idev->cnf.hop_limit;
780                 in6_dev_put(idev);
781         }
782         return hoplimit;
783 }
784
785 /*
786  *
787  */
788
789 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
790 {
791         int err;
792         struct rtmsg *r;
793         struct rtattr **rta;
794         struct rt6_info *rt = NULL;
795         struct net_device *dev = NULL;
796         struct inet6_dev *idev = NULL;
797         int addr_type;
798
799         rta = (struct rtattr **) _rtattr;
800
801         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
802                 return -EINVAL;
803 #ifndef CONFIG_IPV6_SUBTREES
804         if (rtmsg->rtmsg_src_len)
805                 return -EINVAL;
806 #endif
807         if (rtmsg->rtmsg_ifindex) {
808                 err = -ENODEV;
809                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
810                 if (!dev)
811                         goto out;
812                 idev = in6_dev_get(dev);
813                 if (!idev)
814                         goto out;
815         }
816
817         if (rtmsg->rtmsg_metric == 0)
818                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
819
820         rt = ip6_dst_alloc();
821
822         if (rt == NULL) {
823                 err = -ENOMEM;
824                 goto out;
825         }
826
827         rt->u.dst.obsolete = -1;
828         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
829         if (nlh && (r = NLMSG_DATA(nlh))) {
830                 rt->rt6i_protocol = r->rtm_protocol;
831         } else {
832                 rt->rt6i_protocol = RTPROT_BOOT;
833         }
834
835         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
836
837         if (addr_type & IPV6_ADDR_MULTICAST)
838                 rt->u.dst.input = ip6_mc_input;
839         else
840                 rt->u.dst.input = ip6_forward;
841
842         rt->u.dst.output = ip6_output;
843
844         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
845                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
846         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
847         if (rt->rt6i_dst.plen == 128)
848                rt->u.dst.flags = DST_HOST;
849
850 #ifdef CONFIG_IPV6_SUBTREES
851         ipv6_addr_prefix(&rt->rt6i_src.addr, 
852                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
853         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
854 #endif
855
856         rt->rt6i_metric = rtmsg->rtmsg_metric;
857
858         /* We cannot add true routes via loopback here,
859            they would result in kernel looping; promote them to reject routes
860          */
861         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
862             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
863                 /* hold loopback dev/idev if we haven't done so. */
864                 if (dev != &loopback_dev) {
865                         if (dev) {
866                                 dev_put(dev);
867                                 in6_dev_put(idev);
868                         }
869                         dev = &loopback_dev;
870                         dev_hold(dev);
871                         idev = in6_dev_get(dev);
872                         if (!idev) {
873                                 err = -ENODEV;
874                                 goto out;
875                         }
876                 }
877                 rt->u.dst.output = ip6_pkt_discard_out;
878                 rt->u.dst.input = ip6_pkt_discard;
879                 rt->u.dst.error = -ENETUNREACH;
880                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
881                 goto install_route;
882         }
883
884         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
885                 struct in6_addr *gw_addr;
886                 int gwa_type;
887
888                 gw_addr = &rtmsg->rtmsg_gateway;
889                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
890                 gwa_type = ipv6_addr_type(gw_addr);
891
892                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
893                         struct rt6_info *grt;
894
895                         /* IPv6 strictly inhibits using not link-local
896                            addresses as nexthop address.
897                            Otherwise, router will not able to send redirects.
898                            It is very good, but in some (rare!) circumstances
899                            (SIT, PtP, NBMA NOARP links) it is handy to allow
900                            some exceptions. --ANK
901                          */
902                         err = -EINVAL;
903                         if (!(gwa_type&IPV6_ADDR_UNICAST))
904                                 goto out;
905
906                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
907
908                         err = -EHOSTUNREACH;
909                         if (grt == NULL)
910                                 goto out;
911                         if (dev) {
912                                 if (dev != grt->rt6i_dev) {
913                                         dst_release(&grt->u.dst);
914                                         goto out;
915                                 }
916                         } else {
917                                 dev = grt->rt6i_dev;
918                                 idev = grt->rt6i_idev;
919                                 dev_hold(dev);
920                                 in6_dev_hold(grt->rt6i_idev);
921                         }
922                         if (!(grt->rt6i_flags&RTF_GATEWAY))
923                                 err = 0;
924                         dst_release(&grt->u.dst);
925
926                         if (err)
927                                 goto out;
928                 }
929                 err = -EINVAL;
930                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
931                         goto out;
932         }
933
934         err = -ENODEV;
935         if (dev == NULL)
936                 goto out;
937
938         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
939                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
940                 if (IS_ERR(rt->rt6i_nexthop)) {
941                         err = PTR_ERR(rt->rt6i_nexthop);
942                         rt->rt6i_nexthop = NULL;
943                         goto out;
944                 }
945         }
946
947         rt->rt6i_flags = rtmsg->rtmsg_flags;
948
949 install_route:
950         if (rta && rta[RTA_METRICS-1]) {
951                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
952                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
953
954                 while (RTA_OK(attr, attrlen)) {
955                         unsigned flavor = attr->rta_type;
956                         if (flavor) {
957                                 if (flavor > RTAX_MAX) {
958                                         err = -EINVAL;
959                                         goto out;
960                                 }
961                                 rt->u.dst.metrics[flavor-1] =
962                                         *(u32 *)RTA_DATA(attr);
963                         }
964                         attr = RTA_NEXT(attr, attrlen);
965                 }
966         }
967
968         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
969                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
970                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
971                                 IPV6_DEFAULT_MCASTHOPS;
972                 else
973                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
974                                 ipv6_get_hoplimit(dev);
975         }
976
977         if (!rt->u.dst.metrics[RTAX_MTU-1])
978                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
979         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
980                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
981         rt->u.dst.dev = dev;
982         rt->rt6i_idev = idev;
983         return ip6_ins_rt(rt, nlh, _rtattr);
984
985 out:
986         if (dev)
987                 dev_put(dev);
988         if (idev)
989                 in6_dev_put(idev);
990         if (rt)
991                 dst_free((struct dst_entry *) rt);
992         return err;
993 }
994
995 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
996 {
997         int err;
998
999         write_lock_bh(&rt6_lock);
1000
1001         rt6_reset_dflt_pointer(NULL);
1002
1003         err = fib6_del(rt, nlh, _rtattr);
1004         dst_release(&rt->u.dst);
1005
1006         write_unlock_bh(&rt6_lock);
1007
1008         return err;
1009 }
1010
1011 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1012 {
1013         struct fib6_node *fn;
1014         struct rt6_info *rt;
1015         int err = -ESRCH;
1016
1017         read_lock_bh(&rt6_lock);
1018
1019         fn = fib6_locate(&ip6_routing_table,
1020                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1021                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1022         
1023         if (fn) {
1024                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1025                         if (rtmsg->rtmsg_ifindex &&
1026                             (rt->rt6i_dev == NULL ||
1027                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1028                                 continue;
1029                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1030                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1031                                 continue;
1032                         if (rtmsg->rtmsg_metric &&
1033                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1034                                 continue;
1035                         dst_hold(&rt->u.dst);
1036                         read_unlock_bh(&rt6_lock);
1037
1038                         return ip6_del_rt(rt, nlh, _rtattr);
1039                 }
1040         }
1041         read_unlock_bh(&rt6_lock);
1042
1043         return err;
1044 }
1045
1046 /*
1047  *      Handle redirects
1048  */
1049 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1050                   struct neighbour *neigh, u8 *lladdr, int on_link)
1051 {
1052         struct rt6_info *rt, *nrt;
1053
1054         /* Locate old route to this destination. */
1055         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1056
1057         if (rt == NULL)
1058                 return;
1059
1060         if (neigh->dev != rt->rt6i_dev)
1061                 goto out;
1062
1063         /*
1064          * Current route is on-link; redirect is always invalid.
1065          * 
1066          * Seems, previous statement is not true. It could
1067          * be node, which looks for us as on-link (f.e. proxy ndisc)
1068          * But then router serving it might decide, that we should
1069          * know truth 8)8) --ANK (980726).
1070          */
1071         if (!(rt->rt6i_flags&RTF_GATEWAY))
1072                 goto out;
1073
1074         /*
1075          *      RFC 2461 specifies that redirects should only be
1076          *      accepted if they come from the nexthop to the target.
1077          *      Due to the way default routers are chosen, this notion
1078          *      is a bit fuzzy and one might need to check all default
1079          *      routers.
1080          */
1081         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1082                 if (rt->rt6i_flags & RTF_DEFAULT) {
1083                         struct rt6_info *rt1;
1084
1085                         read_lock(&rt6_lock);
1086                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1087                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1088                                         dst_hold(&rt1->u.dst);
1089                                         dst_release(&rt->u.dst);
1090                                         read_unlock(&rt6_lock);
1091                                         rt = rt1;
1092                                         goto source_ok;
1093                                 }
1094                         }
1095                         read_unlock(&rt6_lock);
1096                 }
1097                 if (net_ratelimit())
1098                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1099                                "for redirect target\n");
1100                 goto out;
1101         }
1102
1103 source_ok:
1104
1105         /*
1106          *      We have finally decided to accept it.
1107          */
1108
1109         neigh_update(neigh, lladdr, NUD_STALE, 
1110                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1111                      NEIGH_UPDATE_F_OVERRIDE|
1112                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1113                                      NEIGH_UPDATE_F_ISROUTER))
1114                      );
1115
1116         /*
1117          * Redirect received -> path was valid.
1118          * Look, redirects are sent only in response to data packets,
1119          * so that this nexthop apparently is reachable. --ANK
1120          */
1121         dst_confirm(&rt->u.dst);
1122
1123         /* Duplicate redirect: silently ignore. */
1124         if (neigh == rt->u.dst.neighbour)
1125                 goto out;
1126
1127         nrt = ip6_rt_copy(rt);
1128         if (nrt == NULL)
1129                 goto out;
1130
1131         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1132         if (on_link)
1133                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1134
1135         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1136         nrt->rt6i_dst.plen = 128;
1137         nrt->u.dst.flags |= DST_HOST;
1138
1139         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1140         nrt->rt6i_nexthop = neigh_clone(neigh);
1141         /* Reset pmtu, it may be better */
1142         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1143         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1144
1145         if (ip6_ins_rt(nrt, NULL, NULL))
1146                 goto out;
1147
1148         if (rt->rt6i_flags&RTF_CACHE) {
1149                 ip6_del_rt(rt, NULL, NULL);
1150                 return;
1151         }
1152
1153 out:
1154         dst_release(&rt->u.dst);
1155         return;
1156 }
1157
1158 /*
1159  *      Handle ICMP "packet too big" messages
1160  *      i.e. Path MTU discovery
1161  */
1162
1163 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1164                         struct net_device *dev, u32 pmtu)
1165 {
1166         struct rt6_info *rt, *nrt;
1167
1168         if (pmtu < IPV6_MIN_MTU) {
1169                 if (net_ratelimit())
1170                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1171                                pmtu);
1172                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1173                    link MTU if the node receives a Packet Too Big message
1174                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1175                    */
1176                 pmtu = IPV6_MIN_MTU;
1177         }
1178
1179         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1180
1181         if (rt == NULL)
1182                 return;
1183
1184         if (pmtu >= dst_pmtu(&rt->u.dst))
1185                 goto out;
1186
1187         /* New mtu received -> path was valid.
1188            They are sent only in response to data packets,
1189            so that this nexthop apparently is reachable. --ANK
1190          */
1191         dst_confirm(&rt->u.dst);
1192
1193         /* Host route. If it is static, it would be better
1194            not to override it, but add new one, so that
1195            when cache entry will expire old pmtu
1196            would return automatically.
1197          */
1198         if (rt->rt6i_flags & RTF_CACHE) {
1199                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1200                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1201                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1202                 goto out;
1203         }
1204
1205         /* Network route.
1206            Two cases are possible:
1207            1. It is connected route. Action: COW
1208            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1209          */
1210         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1211                 nrt = rt6_cow(rt, daddr, saddr);
1212                 if (!nrt->u.dst.error) {
1213                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1214                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1215                            happened within 5 mins, the recommended timer is 10 mins.
1216                            Here this route expiration time is set to ip6_rt_mtu_expires
1217                            which is 10 mins. After 10 mins the decreased pmtu is expired
1218                            and detecting PMTU increase will be automatically happened.
1219                          */
1220                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1221                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1222                 }
1223                 dst_release(&nrt->u.dst);
1224         } else {
1225                 nrt = ip6_rt_copy(rt);
1226                 if (nrt == NULL)
1227                         goto out;
1228                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1229                 nrt->rt6i_dst.plen = 128;
1230                 nrt->u.dst.flags |= DST_HOST;
1231                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1232                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1233                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1234                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1235                 ip6_ins_rt(nrt, NULL, NULL);
1236         }
1237
1238 out:
1239         dst_release(&rt->u.dst);
1240 }
1241
1242 /*
1243  *      Misc support functions
1244  */
1245
1246 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1247 {
1248         struct rt6_info *rt = ip6_dst_alloc();
1249
1250         if (rt) {
1251                 rt->u.dst.input = ort->u.dst.input;
1252                 rt->u.dst.output = ort->u.dst.output;
1253
1254                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255                 rt->u.dst.dev = ort->u.dst.dev;
1256                 if (rt->u.dst.dev)
1257                         dev_hold(rt->u.dst.dev);
1258                 rt->rt6i_idev = ort->rt6i_idev;
1259                 if (rt->rt6i_idev)
1260                         in6_dev_hold(rt->rt6i_idev);
1261                 rt->u.dst.lastuse = jiffies;
1262                 rt->rt6i_expires = 0;
1263
1264                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266                 rt->rt6i_metric = 0;
1267
1268                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269 #ifdef CONFIG_IPV6_SUBTREES
1270                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1271 #endif
1272         }
1273         return rt;
1274 }
1275
1276 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1277 {       
1278         struct rt6_info *rt;
1279         struct fib6_node *fn;
1280
1281         fn = &ip6_routing_table;
1282
1283         write_lock_bh(&rt6_lock);
1284         for (rt = fn->leaf; rt; rt=rt->u.next) {
1285                 if (dev == rt->rt6i_dev &&
1286                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1287                         break;
1288         }
1289         if (rt)
1290                 dst_hold(&rt->u.dst);
1291         write_unlock_bh(&rt6_lock);
1292         return rt;
1293 }
1294
1295 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296                                      struct net_device *dev)
1297 {
1298         struct in6_rtmsg rtmsg;
1299
1300         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303         rtmsg.rtmsg_metric = 1024;
1304         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1305
1306         rtmsg.rtmsg_ifindex = dev->ifindex;
1307
1308         ip6_route_add(&rtmsg, NULL, NULL);
1309         return rt6_get_dflt_router(gwaddr, dev);
1310 }
1311
1312 void rt6_purge_dflt_routers(void)
1313 {
1314         struct rt6_info *rt;
1315
1316 restart:
1317         read_lock_bh(&rt6_lock);
1318         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320                         dst_hold(&rt->u.dst);
1321
1322                         rt6_reset_dflt_pointer(NULL);
1323
1324                         read_unlock_bh(&rt6_lock);
1325
1326                         ip6_del_rt(rt, NULL, NULL);
1327
1328                         goto restart;
1329                 }
1330         }
1331         read_unlock_bh(&rt6_lock);
1332 }
1333
1334 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1335 {
1336         struct in6_rtmsg rtmsg;
1337         int err;
1338
1339         switch(cmd) {
1340         case SIOCADDRT:         /* Add a route */
1341         case SIOCDELRT:         /* Delete a route */
1342                 if (!capable(CAP_NET_ADMIN))
1343                         return -EPERM;
1344                 err = copy_from_user(&rtmsg, arg,
1345                                      sizeof(struct in6_rtmsg));
1346                 if (err)
1347                         return -EFAULT;
1348                         
1349                 rtnl_lock();
1350                 switch (cmd) {
1351                 case SIOCADDRT:
1352                         err = ip6_route_add(&rtmsg, NULL, NULL);
1353                         break;
1354                 case SIOCDELRT:
1355                         err = ip6_route_del(&rtmsg, NULL, NULL);
1356                         break;
1357                 default:
1358                         err = -EINVAL;
1359                 }
1360                 rtnl_unlock();
1361
1362                 return err;
1363         };
1364
1365         return -EINVAL;
1366 }
1367
1368 /*
1369  *      Drop the packet on the floor
1370  */
1371
1372 int ip6_pkt_discard(struct sk_buff *skb)
1373 {
1374         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1376         kfree_skb(skb);
1377         return 0;
1378 }
1379
1380 int ip6_pkt_discard_out(struct sk_buff *skb)
1381 {
1382         skb->dev = skb->dst->dev;
1383         return ip6_pkt_discard(skb);
1384 }
1385
1386 /*
1387  *      Allocate a dst for local (unicast / anycast) address.
1388  */
1389
1390 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391                                     const struct in6_addr *addr,
1392                                     int anycast)
1393 {
1394         struct rt6_info *rt = ip6_dst_alloc();
1395
1396         if (rt == NULL)
1397                 return ERR_PTR(-ENOMEM);
1398
1399         dev_hold(&loopback_dev);
1400         in6_dev_hold(idev);
1401
1402         rt->u.dst.flags = DST_HOST;
1403         rt->u.dst.input = ip6_input;
1404         rt->u.dst.output = ip6_output;
1405         rt->rt6i_dev = &loopback_dev;
1406         rt->rt6i_idev = idev;
1407         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1409         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1410         rt->u.dst.obsolete = -1;
1411
1412         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1413         if (!anycast)
1414                 rt->rt6i_flags |= RTF_LOCAL;
1415         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416         if (rt->rt6i_nexthop == NULL) {
1417                 dst_free((struct dst_entry *) rt);
1418                 return ERR_PTR(-ENOMEM);
1419         }
1420
1421         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422         rt->rt6i_dst.plen = 128;
1423
1424         atomic_set(&rt->u.dst.__refcnt, 1);
1425
1426         return rt;
1427 }
1428
1429 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1430 {
1431         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432             rt != &ip6_null_entry) {
1433                 RT6_TRACE("deleted by ifdown %p\n", rt);
1434                 return -1;
1435         }
1436         return 0;
1437 }
1438
1439 void rt6_ifdown(struct net_device *dev)
1440 {
1441         write_lock_bh(&rt6_lock);
1442         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443         write_unlock_bh(&rt6_lock);
1444 }
1445
1446 struct rt6_mtu_change_arg
1447 {
1448         struct net_device *dev;
1449         unsigned mtu;
1450 };
1451
1452 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1453 {
1454         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455         struct inet6_dev *idev;
1456
1457         /* In IPv6 pmtu discovery is not optional,
1458            so that RTAX_MTU lock cannot disable it.
1459            We still use this lock to block changes
1460            caused by addrconf/ndisc.
1461         */
1462
1463         idev = __in6_dev_get(arg->dev);
1464         if (idev == NULL)
1465                 return 0;
1466
1467         /* For administrative MTU increase, there is no way to discover
1468            IPv6 PMTU increase, so PMTU increase should be updated here.
1469            Since RFC 1981 doesn't include administrative MTU increase
1470            update PMTU increase is a MUST. (i.e. jumbo frame)
1471          */
1472         /*
1473            If new MTU is less than route PMTU, this new MTU will be the
1474            lowest MTU in the path, update the route PMTU to reflect PMTU
1475            decreases; if new MTU is greater than route PMTU, and the
1476            old MTU is the lowest MTU in the path, update the route PMTU
1477            to reflect the increase. In this case if the other nodes' MTU
1478            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1479            PMTU discouvery.
1480          */
1481         if (rt->rt6i_dev == arg->dev &&
1482             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1484              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1485               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1486                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1488         return 0;
1489 }
1490
1491 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1492 {
1493         struct rt6_mtu_change_arg arg;
1494
1495         arg.dev = dev;
1496         arg.mtu = mtu;
1497         read_lock_bh(&rt6_lock);
1498         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499         read_unlock_bh(&rt6_lock);
1500 }
1501
1502 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503                               struct in6_rtmsg *rtmsg)
1504 {
1505         memset(rtmsg, 0, sizeof(*rtmsg));
1506
1507         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508         rtmsg->rtmsg_src_len = r->rtm_src_len;
1509         rtmsg->rtmsg_flags = RTF_UP;
1510         if (r->rtm_type == RTN_UNREACHABLE)
1511                 rtmsg->rtmsg_flags |= RTF_REJECT;
1512
1513         if (rta[RTA_GATEWAY-1]) {
1514                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1515                         return -EINVAL;
1516                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1518         }
1519         if (rta[RTA_DST-1]) {
1520                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1521                         return -EINVAL;
1522                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1523         }
1524         if (rta[RTA_SRC-1]) {
1525                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1526                         return -EINVAL;
1527                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1528         }
1529         if (rta[RTA_OIF-1]) {
1530                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1531                         return -EINVAL;
1532                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1533         }
1534         if (rta[RTA_PRIORITY-1]) {
1535                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1536                         return -EINVAL;
1537                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1538         }
1539         return 0;
1540 }
1541
1542 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543 {
1544         struct rtmsg *r = NLMSG_DATA(nlh);
1545         struct in6_rtmsg rtmsg;
1546
1547         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548                 return -EINVAL;
1549         return ip6_route_del(&rtmsg, nlh, arg);
1550 }
1551
1552 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553 {
1554         struct rtmsg *r = NLMSG_DATA(nlh);
1555         struct in6_rtmsg rtmsg;
1556
1557         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558                 return -EINVAL;
1559         return ip6_route_add(&rtmsg, nlh, arg);
1560 }
1561
1562 struct rt6_rtnl_dump_arg
1563 {
1564         struct sk_buff *skb;
1565         struct netlink_callback *cb;
1566 };
1567
1568 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569                          struct in6_addr *dst,
1570                          struct in6_addr *src,
1571                          int iif,
1572                          int type, u32 pid, u32 seq,
1573                          struct nlmsghdr *in_nlh, int prefix)
1574 {
1575         struct rtmsg *rtm;
1576         struct nlmsghdr  *nlh;
1577         unsigned char    *b = skb->tail;
1578         struct rta_cacheinfo ci;
1579
1580         if (prefix) {   /* user wants prefix routes only */
1581                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1582                         /* success since this is not a prefix route */
1583                         return 1;
1584                 }
1585         }
1586
1587         if (!pid && in_nlh) {
1588                 pid = in_nlh->nlmsg_pid;
1589         }
1590
1591         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1592         rtm = NLMSG_DATA(nlh);
1593         rtm->rtm_family = AF_INET6;
1594         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1595         rtm->rtm_src_len = rt->rt6i_src.plen;
1596         rtm->rtm_tos = 0;
1597         rtm->rtm_table = RT_TABLE_MAIN;
1598         if (rt->rt6i_flags&RTF_REJECT)
1599                 rtm->rtm_type = RTN_UNREACHABLE;
1600         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1601                 rtm->rtm_type = RTN_LOCAL;
1602         else
1603                 rtm->rtm_type = RTN_UNICAST;
1604         rtm->rtm_flags = 0;
1605         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1606         rtm->rtm_protocol = rt->rt6i_protocol;
1607         if (rt->rt6i_flags&RTF_DYNAMIC)
1608                 rtm->rtm_protocol = RTPROT_REDIRECT;
1609         else if (rt->rt6i_flags & RTF_ADDRCONF)
1610                 rtm->rtm_protocol = RTPROT_KERNEL;
1611         else if (rt->rt6i_flags&RTF_DEFAULT)
1612                 rtm->rtm_protocol = RTPROT_RA;
1613
1614         if (rt->rt6i_flags&RTF_CACHE)
1615                 rtm->rtm_flags |= RTM_F_CLONED;
1616
1617         if (dst) {
1618                 RTA_PUT(skb, RTA_DST, 16, dst);
1619                 rtm->rtm_dst_len = 128;
1620         } else if (rtm->rtm_dst_len)
1621                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1622 #ifdef CONFIG_IPV6_SUBTREES
1623         if (src) {
1624                 RTA_PUT(skb, RTA_SRC, 16, src);
1625                 rtm->rtm_src_len = 128;
1626         } else if (rtm->rtm_src_len)
1627                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1628 #endif
1629         if (iif)
1630                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1631         else if (dst) {
1632                 struct in6_addr saddr_buf;
1633                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1634                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1635         }
1636         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1637                 goto rtattr_failure;
1638         if (rt->u.dst.neighbour)
1639                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1640         if (rt->u.dst.dev)
1641                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1642         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1643         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1644         if (rt->rt6i_expires)
1645                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1646         else
1647                 ci.rta_expires = 0;
1648         ci.rta_used = rt->u.dst.__use;
1649         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1650         ci.rta_error = rt->u.dst.error;
1651         ci.rta_id = 0;
1652         ci.rta_ts = 0;
1653         ci.rta_tsage = 0;
1654         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1655         nlh->nlmsg_len = skb->tail - b;
1656         return skb->len;
1657
1658 nlmsg_failure:
1659 rtattr_failure:
1660         skb_trim(skb, b - skb->data);
1661         return -1;
1662 }
1663
1664 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1665 {
1666         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1667         int prefix;
1668
1669         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1670                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1671                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1672         } else
1673                 prefix = 0;
1674
1675         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677                      NULL, prefix);
1678 }
1679
1680 static int fib6_dump_node(struct fib6_walker_t *w)
1681 {
1682         int res;
1683         struct rt6_info *rt;
1684
1685         for (rt = w->leaf; rt; rt = rt->u.next) {
1686                 res = rt6_dump_route(rt, w->args);
1687                 if (res < 0) {
1688                         /* Frame is full, suspend walking */
1689                         w->leaf = rt;
1690                         return 1;
1691                 }
1692                 BUG_TRAP(res!=0);
1693         }
1694         w->leaf = NULL;
1695         return 0;
1696 }
1697
1698 static void fib6_dump_end(struct netlink_callback *cb)
1699 {
1700         struct fib6_walker_t *w = (void*)cb->args[0];
1701
1702         if (w) {
1703                 cb->args[0] = 0;
1704                 fib6_walker_unlink(w);
1705                 kfree(w);
1706         }
1707         if (cb->args[1]) {
1708                 cb->done = (void*)cb->args[1];
1709                 cb->args[1] = 0;
1710         }
1711 }
1712
1713 static int fib6_dump_done(struct netlink_callback *cb)
1714 {
1715         fib6_dump_end(cb);
1716         return cb->done(cb);
1717 }
1718
1719 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720 {
1721         struct rt6_rtnl_dump_arg arg;
1722         struct fib6_walker_t *w;
1723         int res;
1724
1725         arg.skb = skb;
1726         arg.cb = cb;
1727
1728         w = (void*)cb->args[0];
1729         if (w == NULL) {
1730                 /* New dump:
1731                  * 
1732                  * 1. hook callback destructor.
1733                  */
1734                 cb->args[1] = (long)cb->done;
1735                 cb->done = fib6_dump_done;
1736
1737                 /*
1738                  * 2. allocate and initialize walker.
1739                  */
1740                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1741                 if (w == NULL)
1742                         return -ENOMEM;
1743                 RT6_TRACE("dump<%p", w);
1744                 memset(w, 0, sizeof(*w));
1745                 w->root = &ip6_routing_table;
1746                 w->func = fib6_dump_node;
1747                 w->args = &arg;
1748                 cb->args[0] = (long)w;
1749                 read_lock_bh(&rt6_lock);
1750                 res = fib6_walk(w);
1751                 read_unlock_bh(&rt6_lock);
1752         } else {
1753                 w->args = &arg;
1754                 read_lock_bh(&rt6_lock);
1755                 res = fib6_walk_continue(w);
1756                 read_unlock_bh(&rt6_lock);
1757         }
1758 #if RT6_DEBUG >= 3
1759         if (res <= 0 && skb->len == 0)
1760                 RT6_TRACE("%p>dump end\n", w);
1761 #endif
1762         res = res < 0 ? res : skb->len;
1763         /* res < 0 is an error. (really, impossible)
1764            res == 0 means that dump is complete, but skb still can contain data.
1765            res > 0 dump is not complete, but frame is full.
1766          */
1767         /* Destroy walker, if dump of this table is complete. */
1768         if (res <= 0)
1769                 fib6_dump_end(cb);
1770         return res;
1771 }
1772
1773 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774 {
1775         struct rtattr **rta = arg;
1776         int iif = 0;
1777         int err = -ENOBUFS;
1778         struct sk_buff *skb;
1779         struct flowi fl;
1780         struct rt6_info *rt;
1781
1782         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783         if (skb == NULL)
1784                 goto out;
1785
1786         /* Reserve room for dummy headers, this skb can pass
1787            through good chunk of routing engine.
1788          */
1789         skb->mac.raw = skb->data;
1790         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791
1792         memset(&fl, 0, sizeof(fl));
1793         if (rta[RTA_SRC-1])
1794                 ipv6_addr_copy(&fl.fl6_src,
1795                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796         if (rta[RTA_DST-1])
1797                 ipv6_addr_copy(&fl.fl6_dst,
1798                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799
1800         if (rta[RTA_IIF-1])
1801                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802
1803         if (iif) {
1804                 struct net_device *dev;
1805                 dev = __dev_get_by_index(iif);
1806                 if (!dev) {
1807                         err = -ENODEV;
1808                         goto out_free;
1809                 }
1810         }
1811
1812         fl.oif = 0;
1813         if (rta[RTA_OIF-1])
1814                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815
1816         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817
1818         skb->dst = &rt->u.dst;
1819
1820         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821         err = rt6_fill_node(skb, rt, 
1822                             &fl.fl6_dst, &fl.fl6_src,
1823                             iif,
1824                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825                             nlh->nlmsg_seq, nlh, 0);
1826         if (err < 0) {
1827                 err = -EMSGSIZE;
1828                 goto out_free;
1829         }
1830
1831         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832         if (err > 0)
1833                 err = 0;
1834 out:
1835         return err;
1836 out_free:
1837         kfree_skb(skb);
1838         goto out;       
1839 }
1840
1841 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1842 {
1843         struct sk_buff *skb;
1844         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1845
1846         skb = alloc_skb(size, gfp_any());
1847         if (!skb) {
1848                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849                 return;
1850         }
1851         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1852                 kfree_skb(skb);
1853                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854                 return;
1855         }
1856         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1857         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1858 }
1859
1860 /*
1861  *      /proc
1862  */
1863
1864 #ifdef CONFIG_PROC_FS
1865
1866 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1867
1868 struct rt6_proc_arg
1869 {
1870         char *buffer;
1871         int offset;
1872         int length;
1873         int skip;
1874         int len;
1875 };
1876
1877 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1878 {
1879         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1880         int i;
1881
1882         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1883                 arg->skip++;
1884                 return 0;
1885         }
1886
1887         if (arg->len >= arg->length)
1888                 return 0;
1889
1890         for (i=0; i<16; i++) {
1891                 sprintf(arg->buffer + arg->len, "%02x",
1892                         rt->rt6i_dst.addr.s6_addr[i]);
1893                 arg->len += 2;
1894         }
1895         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1896                             rt->rt6i_dst.plen);
1897
1898 #ifdef CONFIG_IPV6_SUBTREES
1899         for (i=0; i<16; i++) {
1900                 sprintf(arg->buffer + arg->len, "%02x",
1901                         rt->rt6i_src.addr.s6_addr[i]);
1902                 arg->len += 2;
1903         }
1904         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905                             rt->rt6i_src.plen);
1906 #else
1907         sprintf(arg->buffer + arg->len,
1908                 "00000000000000000000000000000000 00 ");
1909         arg->len += 36;
1910 #endif
1911
1912         if (rt->rt6i_nexthop) {
1913                 for (i=0; i<16; i++) {
1914                         sprintf(arg->buffer + arg->len, "%02x",
1915                                 rt->rt6i_nexthop->primary_key[i]);
1916                         arg->len += 2;
1917                 }
1918         } else {
1919                 sprintf(arg->buffer + arg->len,
1920                         "00000000000000000000000000000000");
1921                 arg->len += 32;
1922         }
1923         arg->len += sprintf(arg->buffer + arg->len,
1924                             " %08x %08x %08x %08x %8s\n",
1925                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1926                             rt->u.dst.__use, rt->rt6i_flags, 
1927                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1928         return 0;
1929 }
1930
1931 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1932 {
1933         struct rt6_proc_arg arg;
1934         arg.buffer = buffer;
1935         arg.offset = offset;
1936         arg.length = length;
1937         arg.skip = 0;
1938         arg.len = 0;
1939
1940         read_lock_bh(&rt6_lock);
1941         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1942         read_unlock_bh(&rt6_lock);
1943
1944         *start = buffer;
1945         if (offset)
1946                 *start += offset % RT6_INFO_LEN;
1947
1948         arg.len -= offset % RT6_INFO_LEN;
1949
1950         if (arg.len > length)
1951                 arg.len = length;
1952         if (arg.len < 0)
1953                 arg.len = 0;
1954
1955         return arg.len;
1956 }
1957
1958 extern struct rt6_statistics rt6_stats;
1959
1960 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1961 {
1962         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1963                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1964                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1965                       rt6_stats.fib_rt_cache,
1966                       atomic_read(&ip6_dst_ops.entries),
1967                       rt6_stats.fib_discarded_routes);
1968
1969         return 0;
1970 }
1971
1972 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1973 {
1974         return single_open(file, rt6_stats_seq_show, NULL);
1975 }
1976
1977 static struct file_operations rt6_stats_seq_fops = {
1978         .owner   = THIS_MODULE,
1979         .open    = rt6_stats_seq_open,
1980         .read    = seq_read,
1981         .llseek  = seq_lseek,
1982         .release = single_release,
1983 };
1984 #endif  /* CONFIG_PROC_FS */
1985
1986 #ifdef CONFIG_SYSCTL
1987
1988 static int flush_delay;
1989
1990 static
1991 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1992                               void __user *buffer, size_t *lenp, loff_t *ppos)
1993 {
1994         if (write) {
1995                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1996                 if (flush_delay < 0)
1997                         flush_delay = 0;
1998                 fib6_run_gc((unsigned long)flush_delay);
1999                 return 0;
2000         } else
2001                 return -EINVAL;
2002 }
2003
2004 ctl_table ipv6_route_table[] = {
2005         {
2006                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2007                 .procname       =       "flush",
2008                 .data           =       &flush_delay,
2009                 .maxlen         =       sizeof(int),
2010                 .mode           =       0644,
2011                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2012         },
2013         {
2014                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2015                 .procname       =       "gc_thresh",
2016                 .data           =       &ip6_dst_ops.gc_thresh,
2017                 .maxlen         =       sizeof(int),
2018                 .mode           =       0644,
2019                 .proc_handler   =       &proc_dointvec,
2020         },
2021         {
2022                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2023                 .procname       =       "max_size",
2024                 .data           =       &ip6_rt_max_size,
2025                 .maxlen         =       sizeof(int),
2026                 .mode           =       0644,
2027                 .proc_handler   =       &proc_dointvec,
2028         },
2029         {
2030                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2031                 .procname       =       "gc_min_interval",
2032                 .data           =       &ip6_rt_gc_min_interval,
2033                 .maxlen         =       sizeof(int),
2034                 .mode           =       0644,
2035                 .proc_handler   =       &proc_dointvec_jiffies,
2036                 .strategy       =       &sysctl_jiffies,
2037         },
2038         {
2039                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2040                 .procname       =       "gc_timeout",
2041                 .data           =       &ip6_rt_gc_timeout,
2042                 .maxlen         =       sizeof(int),
2043                 .mode           =       0644,
2044                 .proc_handler   =       &proc_dointvec_jiffies,
2045                 .strategy       =       &sysctl_jiffies,
2046         },
2047         {
2048                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2049                 .procname       =       "gc_interval",
2050                 .data           =       &ip6_rt_gc_interval,
2051                 .maxlen         =       sizeof(int),
2052                 .mode           =       0644,
2053                 .proc_handler   =       &proc_dointvec_jiffies,
2054                 .strategy       =       &sysctl_jiffies,
2055         },
2056         {
2057                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2058                 .procname       =       "gc_elasticity",
2059                 .data           =       &ip6_rt_gc_elasticity,
2060                 .maxlen         =       sizeof(int),
2061                 .mode           =       0644,
2062                 .proc_handler   =       &proc_dointvec_jiffies,
2063                 .strategy       =       &sysctl_jiffies,
2064         },
2065         {
2066                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2067                 .procname       =       "mtu_expires",
2068                 .data           =       &ip6_rt_mtu_expires,
2069                 .maxlen         =       sizeof(int),
2070                 .mode           =       0644,
2071                 .proc_handler   =       &proc_dointvec_jiffies,
2072                 .strategy       =       &sysctl_jiffies,
2073         },
2074         {
2075                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2076                 .procname       =       "min_adv_mss",
2077                 .data           =       &ip6_rt_min_advmss,
2078                 .maxlen         =       sizeof(int),
2079                 .mode           =       0644,
2080                 .proc_handler   =       &proc_dointvec_jiffies,
2081                 .strategy       =       &sysctl_jiffies,
2082         },
2083         { .ctl_name = 0 }
2084 };
2085
2086 #endif
2087
2088 void __init ip6_route_init(void)
2089 {
2090         struct proc_dir_entry *p;
2091
2092         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2093                                                      sizeof(struct rt6_info),
2094                                                      0, SLAB_HWCACHE_ALIGN,
2095                                                      NULL, NULL);
2096         if (!ip6_dst_ops.kmem_cachep)
2097                 panic("cannot create ip6_dst_cache");
2098
2099         fib6_init();
2100 #ifdef  CONFIG_PROC_FS
2101         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2102         if (p)
2103                 p->owner = THIS_MODULE;
2104
2105         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2106 #endif
2107 #ifdef CONFIG_XFRM
2108         xfrm6_init();
2109 #endif
2110 }
2111
2112 void __exit ip6_route_cleanup(void)
2113 {
2114 #ifdef CONFIG_PROC_FS
2115         proc_net_remove("ipv6_route");
2116         proc_net_remove("rt6_stats");
2117 #endif
2118 #ifdef CONFIG_XFRM
2119         xfrm6_fini();
2120 #endif
2121         rt6_ifdown(NULL);
2122         fib6_gc_cleanup();
2123         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2124 }