vserver 1.9.3
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *, int how);
88 static int               ip6_dst_gc(void);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void             ip6_link_failure(struct sk_buff *skb);
93 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
94
95 static struct dst_ops ip6_dst_ops = {
96         .family                 =       AF_INET6,
97         .protocol               =       __constant_htons(ETH_P_IPV6),
98         .gc                     =       ip6_dst_gc,
99         .gc_thresh              =       1024,
100         .check                  =       ip6_dst_check,
101         .destroy                =       ip6_dst_destroy,
102         .ifdown                 =       ip6_dst_ifdown,
103         .negative_advice        =       ip6_negative_advice,
104         .link_failure           =       ip6_link_failure,
105         .update_pmtu            =       ip6_rt_update_pmtu,
106         .entry_size             =       sizeof(struct rt6_info),
107 };
108
109 struct rt6_info ip6_null_entry = {
110         .u = {
111                 .dst = {
112                         .__refcnt       = ATOMIC_INIT(1),
113                         .__use          = 1,
114                         .dev            = &loopback_dev,
115                         .obsolete       = -1,
116                         .error          = -ENETUNREACH,
117                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
118                         .input          = ip6_pkt_discard,
119                         .output         = ip6_pkt_discard_out,
120                         .ops            = &ip6_dst_ops,
121                         .path           = (struct dst_entry*)&ip6_null_entry,
122                 }
123         },
124         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
125         .rt6i_metric    = ~(u32) 0,
126         .rt6i_ref       = ATOMIC_INIT(1),
127 };
128
129 struct fib6_node ip6_routing_table = {
130         .leaf           = &ip6_null_entry,
131         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
132 };
133
134 /* Protects all the ip6 fib */
135
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
137
138
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
141 {
142         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
143 }
144
145 static void ip6_dst_destroy(struct dst_entry *dst)
146 {
147         struct rt6_info *rt = (struct rt6_info *)dst;
148         struct inet6_dev *idev = rt->rt6i_idev;
149
150         if (idev != NULL) {
151                 rt->rt6i_idev = NULL;
152                 in6_dev_put(idev);
153         }       
154 }
155
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
157 {
158         struct rt6_info *rt = (struct rt6_info *)dst;
159         struct inet6_dev *idev = rt->rt6i_idev;
160
161         if (idev != NULL && idev->dev != &loopback_dev) {
162                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
163                 if (loopback_idev != NULL) {
164                         rt->rt6i_idev = loopback_idev;
165                         in6_dev_put(idev);
166                 }
167         }
168 }
169
170 /*
171  *      Route lookup. Any rt6_lock is implied.
172  */
173
174 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
175                                                     int oif,
176                                                     int strict)
177 {
178         struct rt6_info *local = NULL;
179         struct rt6_info *sprt;
180
181         if (oif) {
182                 for (sprt = rt; sprt; sprt = sprt->u.next) {
183                         struct net_device *dev = sprt->rt6i_dev;
184                         if (dev->ifindex == oif)
185                                 return sprt;
186                         if (dev->flags & IFF_LOOPBACK) {
187                                 if (sprt->rt6i_idev == NULL ||
188                                     sprt->rt6i_idev->dev->ifindex != oif) {
189                                         if (strict && oif)
190                                                 continue;
191                                         if (local && (!oif || 
192                                                       local->rt6i_idev->dev->ifindex == oif))
193                                                 continue;
194                                 }
195                                 local = sprt;
196                         }
197                 }
198
199                 if (local)
200                         return local;
201
202                 if (strict)
203                         return &ip6_null_entry;
204         }
205         return rt;
206 }
207
208 /*
209  *      pointer to the last default router chosen. BH is disabled locally.
210  */
211 struct rt6_info *rt6_dflt_pointer;
212 spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
213
214 void rt6_reset_dflt_pointer(struct rt6_info *rt)
215 {
216         spin_lock_bh(&rt6_dflt_lock);
217         if (rt == NULL || rt == rt6_dflt_pointer) {
218                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
219                 rt6_dflt_pointer = NULL;
220         }
221         spin_unlock_bh(&rt6_dflt_lock);
222 }
223
224 /* Default Router Selection (RFC 2461 6.3.6) */
225 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
226 {
227         struct rt6_info *match = NULL;
228         struct rt6_info *sprt;
229         int mpri = 0;
230
231         for (sprt = rt; sprt; sprt = sprt->u.next) {
232                 struct neighbour *neigh;
233                 int m = 0;
234
235                 if (!oif ||
236                     (sprt->rt6i_dev &&
237                      sprt->rt6i_dev->ifindex == oif))
238                         m += 8;
239
240                 if ((sprt->rt6i_flags & RTF_EXPIRES) &&
241                     time_after(jiffies, sprt->rt6i_expires))
242                         continue;
243
244                 if (sprt == rt6_dflt_pointer)
245                         m += 4;
246
247                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
248                         read_lock_bh(&neigh->lock);
249                         switch (neigh->nud_state) {
250                         case NUD_REACHABLE:
251                                 m += 3;
252                                 break;
253
254                         case NUD_STALE:
255                         case NUD_DELAY:
256                         case NUD_PROBE:
257                                 m += 2;
258                                 break;
259
260                         case NUD_NOARP:
261                         case NUD_PERMANENT:
262                                 m += 1;
263                                 break;
264
265                         case NUD_INCOMPLETE:
266                         default:
267                                 read_unlock_bh(&neigh->lock);
268                                 continue;
269                         }
270                         read_unlock_bh(&neigh->lock);
271                 } else {
272                         continue;
273                 }
274
275                 if (m > mpri || m >= 12) {
276                         match = sprt;
277                         mpri = m;
278                         if (m >= 12) {
279                                 /* we choose the last default router if it
280                                  * is in (probably) reachable state.
281                                  * If route changed, we should do pmtu
282                                  * discovery. --yoshfuji
283                                  */
284                                 break;
285                         }
286                 }
287         }
288
289         spin_lock(&rt6_dflt_lock);
290         if (!match) {
291                 /*
292                  *      No default routers are known to be reachable.
293                  *      SHOULD round robin
294                  */
295                 if (rt6_dflt_pointer) {
296                         for (sprt = rt6_dflt_pointer->u.next;
297                              sprt; sprt = sprt->u.next) {
298                                 if (sprt->u.dst.obsolete <= 0 &&
299                                     sprt->u.dst.error == 0) {
300                                         match = sprt;
301                                         break;
302                                 }
303                         }
304                         for (sprt = rt;
305                              !match && sprt;
306                              sprt = sprt->u.next) {
307                                 if (sprt->u.dst.obsolete <= 0 &&
308                                     sprt->u.dst.error == 0) {
309                                         match = sprt;
310                                         break;
311                                 }
312                                 if (sprt == rt6_dflt_pointer)
313                                         break;
314                         }
315                 }
316         }
317
318         if (match) {
319                 if (rt6_dflt_pointer != match)
320                         RT6_TRACE("changed default router: %p->%p\n",
321                                   rt6_dflt_pointer, match);
322                 rt6_dflt_pointer = match;
323         }
324         spin_unlock(&rt6_dflt_lock);
325
326         if (!match) {
327                 /*
328                  * Last Resort: if no default routers found, 
329                  * use addrconf default route.
330                  * We don't record this route.
331                  */
332                 for (sprt = ip6_routing_table.leaf;
333                      sprt; sprt = sprt->u.next) {
334                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
335                             (!oif ||
336                              (sprt->rt6i_dev &&
337                               sprt->rt6i_dev->ifindex == oif))) {
338                                 match = sprt;
339                                 break;
340                         }
341                 }
342                 if (!match) {
343                         /* no default route.  give up. */
344                         match = &ip6_null_entry;
345                 }
346         }
347
348         return match;
349 }
350
351 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
352                             int oif, int strict)
353 {
354         struct fib6_node *fn;
355         struct rt6_info *rt;
356
357         read_lock_bh(&rt6_lock);
358         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
359         rt = rt6_device_match(fn->leaf, oif, strict);
360         dst_hold(&rt->u.dst);
361         rt->u.dst.__use++;
362         read_unlock_bh(&rt6_lock);
363
364         rt->u.dst.lastuse = jiffies;
365         if (rt->u.dst.error == 0)
366                 return rt;
367         dst_release(&rt->u.dst);
368         return NULL;
369 }
370
371 /* ip6_ins_rt is called with FREE rt6_lock.
372    It takes new route entry, the addition fails by any reason the
373    route is freed. In any case, if caller does not hold it, it may
374    be destroyed.
375  */
376
377 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
378 {
379         int err;
380
381         write_lock_bh(&rt6_lock);
382         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
383         write_unlock_bh(&rt6_lock);
384
385         return err;
386 }
387
388 /* No rt6_lock! If COW failed, the function returns dead route entry
389    with dst->error set to errno value.
390  */
391
392 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
393                                 struct in6_addr *saddr)
394 {
395         int err;
396         struct rt6_info *rt;
397
398         /*
399          *      Clone the route.
400          */
401
402         rt = ip6_rt_copy(ort);
403
404         if (rt) {
405                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
406
407                 if (!(rt->rt6i_flags&RTF_GATEWAY))
408                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
409
410                 rt->rt6i_dst.plen = 128;
411                 rt->rt6i_flags |= RTF_CACHE;
412                 rt->u.dst.flags |= DST_HOST;
413
414 #ifdef CONFIG_IPV6_SUBTREES
415                 if (rt->rt6i_src.plen && saddr) {
416                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
417                         rt->rt6i_src.plen = 128;
418                 }
419 #endif
420
421                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
422
423                 dst_hold(&rt->u.dst);
424
425                 err = ip6_ins_rt(rt, NULL, NULL);
426                 if (err == 0)
427                         return rt;
428
429                 rt->u.dst.error = err;
430
431                 return rt;
432         }
433         dst_hold(&ip6_null_entry.u.dst);
434         return &ip6_null_entry;
435 }
436
437 #define BACKTRACK() \
438 if (rt == &ip6_null_entry && strict) { \
439        while ((fn = fn->parent) != NULL) { \
440                 if (fn->fn_flags & RTN_ROOT) { \
441                         dst_hold(&rt->u.dst); \
442                         goto out; \
443                 } \
444                 if (fn->fn_flags & RTN_RTINFO) \
445                         goto restart; \
446         } \
447 }
448
449
450 void ip6_route_input(struct sk_buff *skb)
451 {
452         struct fib6_node *fn;
453         struct rt6_info *rt;
454         int strict;
455         int attempts = 3;
456
457         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
458
459 relookup:
460         read_lock_bh(&rt6_lock);
461
462         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
463                          &skb->nh.ipv6h->saddr);
464
465 restart:
466         rt = fn->leaf;
467
468         if ((rt->rt6i_flags & RTF_CACHE)) {
469                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
470                 BACKTRACK();
471                 dst_hold(&rt->u.dst);
472                 goto out;
473         }
474
475         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
476         BACKTRACK();
477
478         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479                 read_unlock_bh(&rt6_lock);
480
481                 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
482                              &skb->nh.ipv6h->saddr);
483                         
484                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
485                         goto out2;
486                 /* Race condition! In the gap, when rt6_lock was
487                    released someone could insert this route.  Relookup.
488                 */
489                 dst_release(&rt->u.dst);
490                 goto relookup;
491         }
492         dst_hold(&rt->u.dst);
493
494 out:
495         read_unlock_bh(&rt6_lock);
496 out2:
497         rt->u.dst.lastuse = jiffies;
498         rt->u.dst.__use++;
499         skb->dst = (struct dst_entry *) rt;
500 }
501
502 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
503 {
504         struct fib6_node *fn;
505         struct rt6_info *rt;
506         int strict;
507         int attempts = 3;
508
509         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
510
511 relookup:
512         read_lock_bh(&rt6_lock);
513
514         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
515
516 restart:
517         rt = fn->leaf;
518
519         if ((rt->rt6i_flags & RTF_CACHE)) {
520                 rt = rt6_device_match(rt, fl->oif, strict);
521                 BACKTRACK();
522                 dst_hold(&rt->u.dst);
523                 goto out;
524         }
525         if (rt->rt6i_flags & RTF_DEFAULT) {
526                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
527                         rt = rt6_best_dflt(rt, fl->oif);
528         } else {
529                 rt = rt6_device_match(rt, fl->oif, strict);
530                 BACKTRACK();
531         }
532
533         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
534                 read_unlock_bh(&rt6_lock);
535
536                 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
537
538                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
539                         goto out2;
540
541                 /* Race condition! In the gap, when rt6_lock was
542                    released someone could insert this route.  Relookup.
543                 */
544                 dst_release(&rt->u.dst);
545                 goto relookup;
546         }
547         dst_hold(&rt->u.dst);
548
549 out:
550         read_unlock_bh(&rt6_lock);
551 out2:
552         rt->u.dst.lastuse = jiffies;
553         rt->u.dst.__use++;
554         return &rt->u.dst;
555 }
556
557
558 /*
559  *      Destination cache support functions
560  */
561
562 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
563 {
564         struct rt6_info *rt;
565
566         rt = (struct rt6_info *) dst;
567
568         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
569                 return dst;
570
571         dst_release(dst);
572         return NULL;
573 }
574
575 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
576 {
577         struct rt6_info *rt = (struct rt6_info *) dst;
578
579         if (rt) {
580                 if (rt->rt6i_flags & RTF_CACHE)
581                         ip6_del_rt(rt, NULL, NULL);
582                 else
583                         dst_release(dst);
584         }
585         return NULL;
586 }
587
588 static void ip6_link_failure(struct sk_buff *skb)
589 {
590         struct rt6_info *rt;
591
592         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
593
594         rt = (struct rt6_info *) skb->dst;
595         if (rt) {
596                 if (rt->rt6i_flags&RTF_CACHE) {
597                         dst_set_expires(&rt->u.dst, 0);
598                         rt->rt6i_flags |= RTF_EXPIRES;
599                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
600                         rt->rt6i_node->fn_sernum = -1;
601         }
602 }
603
604 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
605 {
606         struct rt6_info *rt6 = (struct rt6_info*)dst;
607
608         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
609                 rt6->rt6i_flags |= RTF_MODIFIED;
610                 if (mtu < IPV6_MIN_MTU)
611                         mtu = IPV6_MIN_MTU;
612                 dst->metrics[RTAX_MTU-1] = mtu;
613         }
614 }
615
616 /* Protected by rt6_lock.  */
617 static struct dst_entry *ndisc_dst_gc_list;
618 static int ipv6_get_mtu(struct net_device *dev);
619
620 static inline unsigned int ipv6_advmss(unsigned int mtu)
621 {
622         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
623
624         if (mtu < ip6_rt_min_advmss)
625                 mtu = ip6_rt_min_advmss;
626
627         /*
628          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
629          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
630          * IPV6_MAXPLEN is also valid and means: "any MSS, 
631          * rely only on pmtu discovery"
632          */
633         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
634                 mtu = IPV6_MAXPLEN;
635         return mtu;
636 }
637
638 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
639                                   struct neighbour *neigh,
640                                   struct in6_addr *addr,
641                                   int (*output)(struct sk_buff **))
642 {
643         struct rt6_info *rt;
644         struct inet6_dev *idev = in6_dev_get(dev);
645
646         if (unlikely(idev == NULL))
647                 return NULL;
648
649         rt = ip6_dst_alloc();
650         if (unlikely(rt == NULL))
651                 goto out;
652
653         dev_hold(dev);
654         if (neigh)
655                 neigh_hold(neigh);
656         else
657                 neigh = ndisc_get_neigh(dev, addr);
658
659         rt->rt6i_dev      = dev;
660         rt->rt6i_idev     = idev;
661         rt->rt6i_nexthop  = neigh;
662         atomic_set(&rt->u.dst.__refcnt, 1);
663         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
664         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
665         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
666         rt->u.dst.output  = output;
667
668 #if 0   /* there's no chance to use these for ndisc */
669         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
670                                 ? DST_HOST 
671                                 : 0;
672         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
673         rt->rt6i_dst.plen = 128;
674 #endif
675
676         write_lock_bh(&rt6_lock);
677         rt->u.dst.next = ndisc_dst_gc_list;
678         ndisc_dst_gc_list = &rt->u.dst;
679         write_unlock_bh(&rt6_lock);
680
681         fib6_force_start_gc();
682
683 out:
684         return (struct dst_entry *)rt;
685 }
686
687 int ndisc_dst_gc(int *more)
688 {
689         struct dst_entry *dst, *next, **pprev;
690         int freed;
691
692         next = NULL;
693         pprev = &ndisc_dst_gc_list;
694         freed = 0;
695         while ((dst = *pprev) != NULL) {
696                 if (!atomic_read(&dst->__refcnt)) {
697                         *pprev = dst->next;
698                         dst_free(dst);
699                         freed++;
700                 } else {
701                         pprev = &dst->next;
702                         (*more)++;
703                 }
704         }
705
706         return freed;
707 }
708
709 static int ip6_dst_gc(void)
710 {
711         static unsigned expire = 30*HZ;
712         static unsigned long last_gc;
713         unsigned long now = jiffies;
714
715         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
716             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
717                 goto out;
718
719         expire++;
720         fib6_run_gc(expire);
721         last_gc = now;
722         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
723                 expire = ip6_rt_gc_timeout>>1;
724
725 out:
726         expire -= expire>>ip6_rt_gc_elasticity;
727         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
728 }
729
730 /* Clean host part of a prefix. Not necessary in radix tree,
731    but results in cleaner routing tables.
732
733    Remove it only when all the things will work!
734  */
735
736 static int ipv6_get_mtu(struct net_device *dev)
737 {
738         int mtu = IPV6_MIN_MTU;
739         struct inet6_dev *idev;
740
741         idev = in6_dev_get(dev);
742         if (idev) {
743                 mtu = idev->cnf.mtu6;
744                 in6_dev_put(idev);
745         }
746         return mtu;
747 }
748
749 static int ipv6_get_hoplimit(struct net_device *dev)
750 {
751         int hoplimit = ipv6_devconf.hop_limit;
752         struct inet6_dev *idev;
753
754         idev = in6_dev_get(dev);
755         if (idev) {
756                 hoplimit = idev->cnf.hop_limit;
757                 in6_dev_put(idev);
758         }
759         return hoplimit;
760 }
761
762 /*
763  *
764  */
765
766 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
767 {
768         int err;
769         struct rtmsg *r;
770         struct rtattr **rta;
771         struct rt6_info *rt = NULL;
772         struct net_device *dev = NULL;
773         struct inet6_dev *idev = NULL;
774         int addr_type;
775
776         rta = (struct rtattr **) _rtattr;
777
778         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
779                 return -EINVAL;
780 #ifndef CONFIG_IPV6_SUBTREES
781         if (rtmsg->rtmsg_src_len)
782                 return -EINVAL;
783 #endif
784         if (rtmsg->rtmsg_ifindex) {
785                 err = -ENODEV;
786                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
787                 if (!dev)
788                         goto out;
789                 idev = in6_dev_get(dev);
790                 if (!idev)
791                         goto out;
792         }
793
794         if (rtmsg->rtmsg_metric == 0)
795                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
796
797         rt = ip6_dst_alloc();
798
799         if (rt == NULL)
800                 return -ENOMEM;
801
802         rt->u.dst.obsolete = -1;
803         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
804         if (nlh && (r = NLMSG_DATA(nlh))) {
805                 rt->rt6i_protocol = r->rtm_protocol;
806         } else {
807                 rt->rt6i_protocol = RTPROT_BOOT;
808         }
809
810         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
811
812         if (addr_type & IPV6_ADDR_MULTICAST)
813                 rt->u.dst.input = ip6_mc_input;
814         else
815                 rt->u.dst.input = ip6_forward;
816
817         rt->u.dst.output = ip6_output;
818
819         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
820                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
821         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
822         if (rt->rt6i_dst.plen == 128)
823                rt->u.dst.flags = DST_HOST;
824
825 #ifdef CONFIG_IPV6_SUBTREES
826         ipv6_addr_prefix(&rt->rt6i_src.addr, 
827                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
828         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
829 #endif
830
831         rt->rt6i_metric = rtmsg->rtmsg_metric;
832
833         /* We cannot add true routes via loopback here,
834            they would result in kernel looping; promote them to reject routes
835          */
836         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
837             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
838                 /* hold loopback dev/idev if we haven't done so. */
839                 if (dev != &loopback_dev) {
840                         if (dev) {
841                                 dev_put(dev);
842                                 in6_dev_put(idev);
843                         }
844                         dev = &loopback_dev;
845                         dev_hold(dev);
846                         idev = in6_dev_get(dev);
847                         if (!idev) {
848                                 err = -ENODEV;
849                                 goto out;
850                         }
851                 }
852                 rt->u.dst.output = ip6_pkt_discard_out;
853                 rt->u.dst.input = ip6_pkt_discard;
854                 rt->u.dst.error = -ENETUNREACH;
855                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
856                 goto install_route;
857         }
858
859         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
860                 struct in6_addr *gw_addr;
861                 int gwa_type;
862
863                 gw_addr = &rtmsg->rtmsg_gateway;
864                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
865                 gwa_type = ipv6_addr_type(gw_addr);
866
867                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
868                         struct rt6_info *grt;
869
870                         /* IPv6 strictly inhibits using not link-local
871                            addresses as nexthop address.
872                            Otherwise, router will not able to send redirects.
873                            It is very good, but in some (rare!) circumstances
874                            (SIT, PtP, NBMA NOARP links) it is handy to allow
875                            some exceptions. --ANK
876                          */
877                         err = -EINVAL;
878                         if (!(gwa_type&IPV6_ADDR_UNICAST))
879                                 goto out;
880
881                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
882
883                         err = -EHOSTUNREACH;
884                         if (grt == NULL)
885                                 goto out;
886                         if (dev) {
887                                 if (dev != grt->rt6i_dev) {
888                                         dst_release(&grt->u.dst);
889                                         goto out;
890                                 }
891                         } else {
892                                 dev = grt->rt6i_dev;
893                                 idev = grt->rt6i_idev;
894                                 dev_hold(dev);
895                                 in6_dev_hold(grt->rt6i_idev);
896                         }
897                         if (!(grt->rt6i_flags&RTF_GATEWAY))
898                                 err = 0;
899                         dst_release(&grt->u.dst);
900
901                         if (err)
902                                 goto out;
903                 }
904                 err = -EINVAL;
905                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
906                         goto out;
907         }
908
909         err = -ENODEV;
910         if (dev == NULL)
911                 goto out;
912
913         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
914                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
915                 if (IS_ERR(rt->rt6i_nexthop)) {
916                         err = PTR_ERR(rt->rt6i_nexthop);
917                         rt->rt6i_nexthop = NULL;
918                         goto out;
919                 }
920         }
921
922         rt->rt6i_flags = rtmsg->rtmsg_flags;
923
924 install_route:
925         if (rta && rta[RTA_METRICS-1]) {
926                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
927                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
928
929                 while (RTA_OK(attr, attrlen)) {
930                         unsigned flavor = attr->rta_type;
931                         if (flavor) {
932                                 if (flavor > RTAX_MAX) {
933                                         err = -EINVAL;
934                                         goto out;
935                                 }
936                                 rt->u.dst.metrics[flavor-1] =
937                                         *(u32 *)RTA_DATA(attr);
938                         }
939                         attr = RTA_NEXT(attr, attrlen);
940                 }
941         }
942
943         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
944                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
945                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
946                                 IPV6_DEFAULT_MCASTHOPS;
947                 else
948                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
949                                 ipv6_get_hoplimit(dev);
950         }
951
952         if (!rt->u.dst.metrics[RTAX_MTU-1])
953                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
954         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
955                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
956         rt->u.dst.dev = dev;
957         rt->rt6i_idev = idev;
958         return ip6_ins_rt(rt, nlh, _rtattr);
959
960 out:
961         if (dev)
962                 dev_put(dev);
963         dst_free((struct dst_entry *) rt);
964         return err;
965 }
966
967 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
968 {
969         int err;
970
971         write_lock_bh(&rt6_lock);
972
973         rt6_reset_dflt_pointer(NULL);
974
975         dst_release(&rt->u.dst);
976
977         err = fib6_del(rt, nlh, _rtattr);
978         write_unlock_bh(&rt6_lock);
979
980         return err;
981 }
982
983 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
984 {
985         struct fib6_node *fn;
986         struct rt6_info *rt;
987         int err = -ESRCH;
988
989         read_lock_bh(&rt6_lock);
990
991         fn = fib6_locate(&ip6_routing_table,
992                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
993                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
994         
995         if (fn) {
996                 for (rt = fn->leaf; rt; rt = rt->u.next) {
997                         if (rtmsg->rtmsg_ifindex &&
998                             (rt->rt6i_dev == NULL ||
999                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1000                                 continue;
1001                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1002                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1003                                 continue;
1004                         if (rtmsg->rtmsg_metric &&
1005                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1006                                 continue;
1007                         dst_hold(&rt->u.dst);
1008                         read_unlock_bh(&rt6_lock);
1009
1010                         return ip6_del_rt(rt, nlh, _rtattr);
1011                 }
1012         }
1013         read_unlock_bh(&rt6_lock);
1014
1015         return err;
1016 }
1017
1018 /*
1019  *      Handle redirects
1020  */
1021 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1022                   struct neighbour *neigh, u8 *lladdr, int on_link)
1023 {
1024         struct rt6_info *rt, *nrt;
1025
1026         /* Locate old route to this destination. */
1027         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1028
1029         if (rt == NULL)
1030                 return;
1031
1032         if (neigh->dev != rt->rt6i_dev)
1033                 goto out;
1034
1035         /*
1036          * Current route is on-link; redirect is always invalid.
1037          * 
1038          * Seems, previous statement is not true. It could
1039          * be node, which looks for us as on-link (f.e. proxy ndisc)
1040          * But then router serving it might decide, that we should
1041          * know truth 8)8) --ANK (980726).
1042          */
1043         if (!(rt->rt6i_flags&RTF_GATEWAY))
1044                 goto out;
1045
1046         /*
1047          *      RFC 2461 specifies that redirects should only be
1048          *      accepted if they come from the nexthop to the target.
1049          *      Due to the way default routers are chosen, this notion
1050          *      is a bit fuzzy and one might need to check all default
1051          *      routers.
1052          */
1053         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1054                 if (rt->rt6i_flags & RTF_DEFAULT) {
1055                         struct rt6_info *rt1;
1056
1057                         read_lock(&rt6_lock);
1058                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1059                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1060                                         dst_hold(&rt1->u.dst);
1061                                         dst_release(&rt->u.dst);
1062                                         read_unlock(&rt6_lock);
1063                                         rt = rt1;
1064                                         goto source_ok;
1065                                 }
1066                         }
1067                         read_unlock(&rt6_lock);
1068                 }
1069                 if (net_ratelimit())
1070                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1071                                "for redirect target\n");
1072                 goto out;
1073         }
1074
1075 source_ok:
1076
1077         /*
1078          *      We have finally decided to accept it.
1079          */
1080
1081         neigh_update(neigh, lladdr, NUD_STALE, 
1082                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1083                      NEIGH_UPDATE_F_OVERRIDE|
1084                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1085                                      NEIGH_UPDATE_F_ISROUTER))
1086                      );
1087
1088         /*
1089          * Redirect received -> path was valid.
1090          * Look, redirects are sent only in response to data packets,
1091          * so that this nexthop apparently is reachable. --ANK
1092          */
1093         dst_confirm(&rt->u.dst);
1094
1095         /* Duplicate redirect: silently ignore. */
1096         if (neigh == rt->u.dst.neighbour)
1097                 goto out;
1098
1099         nrt = ip6_rt_copy(rt);
1100         if (nrt == NULL)
1101                 goto out;
1102
1103         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1104         if (on_link)
1105                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1106
1107         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1108         nrt->rt6i_dst.plen = 128;
1109         nrt->u.dst.flags |= DST_HOST;
1110
1111         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1112         nrt->rt6i_nexthop = neigh_clone(neigh);
1113         /* Reset pmtu, it may be better */
1114         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1115         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1116
1117         if (ip6_ins_rt(nrt, NULL, NULL))
1118                 goto out;
1119
1120         if (rt->rt6i_flags&RTF_CACHE) {
1121                 ip6_del_rt(rt, NULL, NULL);
1122                 return;
1123         }
1124
1125 out:
1126         dst_release(&rt->u.dst);
1127         return;
1128 }
1129
1130 /*
1131  *      Handle ICMP "packet too big" messages
1132  *      i.e. Path MTU discovery
1133  */
1134
1135 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1136                         struct net_device *dev, u32 pmtu)
1137 {
1138         struct rt6_info *rt, *nrt;
1139
1140         if (pmtu < IPV6_MIN_MTU) {
1141                 if (net_ratelimit())
1142                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1143                                pmtu);
1144                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1145                    link MTU if the node receives a Packet Too Big message
1146                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1147                    */
1148                 pmtu = IPV6_MIN_MTU;
1149         }
1150
1151         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1152
1153         if (rt == NULL)
1154                 return;
1155
1156         if (pmtu >= dst_pmtu(&rt->u.dst))
1157                 goto out;
1158
1159         /* New mtu received -> path was valid.
1160            They are sent only in response to data packets,
1161            so that this nexthop apparently is reachable. --ANK
1162          */
1163         dst_confirm(&rt->u.dst);
1164
1165         /* Host route. If it is static, it would be better
1166            not to override it, but add new one, so that
1167            when cache entry will expire old pmtu
1168            would return automatically.
1169          */
1170         if (rt->rt6i_flags & RTF_CACHE) {
1171                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1172                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1173                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1174                 goto out;
1175         }
1176
1177         /* Network route.
1178            Two cases are possible:
1179            1. It is connected route. Action: COW
1180            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1181          */
1182         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1183                 nrt = rt6_cow(rt, daddr, saddr);
1184                 if (!nrt->u.dst.error) {
1185                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1186                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1187                            happened within 5 mins, the recommended timer is 10 mins.
1188                            Here this route expiration time is set to ip6_rt_mtu_expires
1189                            which is 10 mins. After 10 mins the decreased pmtu is expired
1190                            and detecting PMTU increase will be automatically happened.
1191                          */
1192                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1193                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1194                 }
1195                 dst_release(&nrt->u.dst);
1196         } else {
1197                 nrt = ip6_rt_copy(rt);
1198                 if (nrt == NULL)
1199                         goto out;
1200                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1201                 nrt->rt6i_dst.plen = 128;
1202                 nrt->u.dst.flags |= DST_HOST;
1203                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1204                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1205                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1206                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1207                 ip6_ins_rt(nrt, NULL, NULL);
1208         }
1209
1210 out:
1211         dst_release(&rt->u.dst);
1212 }
1213
1214 /*
1215  *      Misc support functions
1216  */
1217
1218 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1219 {
1220         struct rt6_info *rt = ip6_dst_alloc();
1221
1222         if (rt) {
1223                 rt->u.dst.input = ort->u.dst.input;
1224                 rt->u.dst.output = ort->u.dst.output;
1225
1226                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1227                 rt->u.dst.dev = ort->u.dst.dev;
1228                 if (rt->u.dst.dev)
1229                         dev_hold(rt->u.dst.dev);
1230                 rt->rt6i_idev = ort->rt6i_idev;
1231                 if (rt->rt6i_idev)
1232                         in6_dev_hold(rt->rt6i_idev);
1233                 rt->u.dst.lastuse = jiffies;
1234                 rt->rt6i_expires = 0;
1235
1236                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1237                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1238                 rt->rt6i_metric = 0;
1239
1240                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1241 #ifdef CONFIG_IPV6_SUBTREES
1242                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1243 #endif
1244         }
1245         return rt;
1246 }
1247
1248 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1249 {       
1250         struct rt6_info *rt;
1251         struct fib6_node *fn;
1252
1253         fn = &ip6_routing_table;
1254
1255         write_lock_bh(&rt6_lock);
1256         for (rt = fn->leaf; rt; rt=rt->u.next) {
1257                 if (dev == rt->rt6i_dev &&
1258                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1259                         break;
1260         }
1261         if (rt)
1262                 dst_hold(&rt->u.dst);
1263         write_unlock_bh(&rt6_lock);
1264         return rt;
1265 }
1266
1267 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1268                                      struct net_device *dev)
1269 {
1270         struct in6_rtmsg rtmsg;
1271
1272         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1273         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1274         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1275         rtmsg.rtmsg_metric = 1024;
1276         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1277
1278         rtmsg.rtmsg_ifindex = dev->ifindex;
1279
1280         ip6_route_add(&rtmsg, NULL, NULL);
1281         return rt6_get_dflt_router(gwaddr, dev);
1282 }
1283
1284 void rt6_purge_dflt_routers(int last_resort)
1285 {
1286         struct rt6_info *rt;
1287         u32 flags;
1288
1289         if (last_resort)
1290                 flags = RTF_ALLONLINK;
1291         else
1292                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1293
1294 restart:
1295         read_lock_bh(&rt6_lock);
1296         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1297                 if (rt->rt6i_flags & flags) {
1298                         dst_hold(&rt->u.dst);
1299
1300                         rt6_reset_dflt_pointer(NULL);
1301
1302                         read_unlock_bh(&rt6_lock);
1303
1304                         ip6_del_rt(rt, NULL, NULL);
1305
1306                         goto restart;
1307                 }
1308         }
1309         read_unlock_bh(&rt6_lock);
1310 }
1311
1312 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1313 {
1314         struct in6_rtmsg rtmsg;
1315         int err;
1316
1317         switch(cmd) {
1318         case SIOCADDRT:         /* Add a route */
1319         case SIOCDELRT:         /* Delete a route */
1320                 if (!capable(CAP_NET_ADMIN))
1321                         return -EPERM;
1322                 err = copy_from_user(&rtmsg, arg,
1323                                      sizeof(struct in6_rtmsg));
1324                 if (err)
1325                         return -EFAULT;
1326                         
1327                 rtnl_lock();
1328                 switch (cmd) {
1329                 case SIOCADDRT:
1330                         err = ip6_route_add(&rtmsg, NULL, NULL);
1331                         break;
1332                 case SIOCDELRT:
1333                         err = ip6_route_del(&rtmsg, NULL, NULL);
1334                         break;
1335                 default:
1336                         err = -EINVAL;
1337                 }
1338                 rtnl_unlock();
1339
1340                 return err;
1341         };
1342
1343         return -EINVAL;
1344 }
1345
1346 /*
1347  *      Drop the packet on the floor
1348  */
1349
1350 int ip6_pkt_discard(struct sk_buff *skb)
1351 {
1352         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1353         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1354         kfree_skb(skb);
1355         return 0;
1356 }
1357
1358 int ip6_pkt_discard_out(struct sk_buff **pskb)
1359 {
1360         (*pskb)->dev = (*pskb)->dst->dev;
1361         BUG_ON(!(*pskb)->dev);
1362         return ip6_pkt_discard(*pskb);
1363 }
1364
1365 /*
1366  *      Allocate a dst for local (unicast / anycast) address.
1367  */
1368
1369 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1370                                     const struct in6_addr *addr,
1371                                     int anycast)
1372 {
1373         struct rt6_info *rt = ip6_dst_alloc();
1374
1375         if (rt == NULL)
1376                 return ERR_PTR(-ENOMEM);
1377
1378         dev_hold(&loopback_dev);
1379         in6_dev_hold(idev);
1380
1381         rt->u.dst.flags = DST_HOST;
1382         rt->u.dst.input = ip6_input;
1383         rt->u.dst.output = ip6_output;
1384         rt->rt6i_dev = &loopback_dev;
1385         rt->rt6i_idev = idev;
1386         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1387         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1388         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1389         rt->u.dst.obsolete = -1;
1390
1391         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1392         if (!anycast)
1393                 rt->rt6i_flags |= RTF_LOCAL;
1394         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1395         if (rt->rt6i_nexthop == NULL) {
1396                 dst_free((struct dst_entry *) rt);
1397                 return ERR_PTR(-ENOMEM);
1398         }
1399
1400         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1401         rt->rt6i_dst.plen = 128;
1402
1403         atomic_set(&rt->u.dst.__refcnt, 1);
1404
1405         return rt;
1406 }
1407
1408 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1409 {
1410         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1411             rt != &ip6_null_entry) {
1412                 RT6_TRACE("deleted by ifdown %p\n", rt);
1413                 return -1;
1414         }
1415         return 0;
1416 }
1417
1418 void rt6_ifdown(struct net_device *dev)
1419 {
1420         write_lock_bh(&rt6_lock);
1421         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1422         write_unlock_bh(&rt6_lock);
1423 }
1424
1425 struct rt6_mtu_change_arg
1426 {
1427         struct net_device *dev;
1428         unsigned mtu;
1429 };
1430
1431 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1432 {
1433         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1434         struct inet6_dev *idev;
1435
1436         /* In IPv6 pmtu discovery is not optional,
1437            so that RTAX_MTU lock cannot disable it.
1438            We still use this lock to block changes
1439            caused by addrconf/ndisc.
1440         */
1441
1442         idev = __in6_dev_get(arg->dev);
1443         if (idev == NULL)
1444                 return 0;
1445
1446         /* For administrative MTU increase, there is no way to discover
1447            IPv6 PMTU increase, so PMTU increase should be updated here.
1448            Since RFC 1981 doesn't include administrative MTU increase
1449            update PMTU increase is a MUST. (i.e. jumbo frame)
1450          */
1451         /*
1452            If new MTU is less than route PMTU, this new MTU will be the
1453            lowest MTU in the path, update the route PMTU to reflect PMTU
1454            decreases; if new MTU is greater than route PMTU, and the
1455            old MTU is the lowest MTU in the path, update the route PMTU
1456            to reflect the increase. In this case if the other nodes' MTU
1457            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1458            PMTU discouvery.
1459          */
1460         if (rt->rt6i_dev == arg->dev &&
1461             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1462             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1463              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1464               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1465                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1466         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1467         return 0;
1468 }
1469
1470 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1471 {
1472         struct rt6_mtu_change_arg arg;
1473
1474         arg.dev = dev;
1475         arg.mtu = mtu;
1476         read_lock_bh(&rt6_lock);
1477         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1478         read_unlock_bh(&rt6_lock);
1479 }
1480
1481 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1482                               struct in6_rtmsg *rtmsg)
1483 {
1484         memset(rtmsg, 0, sizeof(*rtmsg));
1485
1486         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1487         rtmsg->rtmsg_src_len = r->rtm_src_len;
1488         rtmsg->rtmsg_flags = RTF_UP;
1489         if (r->rtm_type == RTN_UNREACHABLE)
1490                 rtmsg->rtmsg_flags |= RTF_REJECT;
1491
1492         if (rta[RTA_GATEWAY-1]) {
1493                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1494                         return -EINVAL;
1495                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1496                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1497         }
1498         if (rta[RTA_DST-1]) {
1499                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1500                         return -EINVAL;
1501                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1502         }
1503         if (rta[RTA_SRC-1]) {
1504                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1505                         return -EINVAL;
1506                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1507         }
1508         if (rta[RTA_OIF-1]) {
1509                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1510                         return -EINVAL;
1511                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1512         }
1513         if (rta[RTA_PRIORITY-1]) {
1514                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1515                         return -EINVAL;
1516                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1517         }
1518         return 0;
1519 }
1520
1521 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1522 {
1523         struct rtmsg *r = NLMSG_DATA(nlh);
1524         struct in6_rtmsg rtmsg;
1525
1526         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1527                 return -EINVAL;
1528         return ip6_route_del(&rtmsg, nlh, arg);
1529 }
1530
1531 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1532 {
1533         struct rtmsg *r = NLMSG_DATA(nlh);
1534         struct in6_rtmsg rtmsg;
1535
1536         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1537                 return -EINVAL;
1538         return ip6_route_add(&rtmsg, nlh, arg);
1539 }
1540
1541 struct rt6_rtnl_dump_arg
1542 {
1543         struct sk_buff *skb;
1544         struct netlink_callback *cb;
1545 };
1546
1547 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1548                          struct in6_addr *dst,
1549                          struct in6_addr *src,
1550                          int iif,
1551                          int type, u32 pid, u32 seq,
1552                          struct nlmsghdr *in_nlh, int prefix)
1553 {
1554         struct rtmsg *rtm;
1555         struct nlmsghdr  *nlh;
1556         unsigned char    *b = skb->tail;
1557         struct rta_cacheinfo ci;
1558
1559         if (prefix) {   /* user wants prefix routes only */
1560                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1561                         /* success since this is not a prefix route */
1562                         return 1;
1563                 }
1564         }
1565
1566         if (!pid && in_nlh) {
1567                 pid = in_nlh->nlmsg_pid;
1568         }
1569
1570         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1571         rtm = NLMSG_DATA(nlh);
1572         rtm->rtm_family = AF_INET6;
1573         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1574         rtm->rtm_src_len = rt->rt6i_src.plen;
1575         rtm->rtm_tos = 0;
1576         rtm->rtm_table = RT_TABLE_MAIN;
1577         if (rt->rt6i_flags&RTF_REJECT)
1578                 rtm->rtm_type = RTN_UNREACHABLE;
1579         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1580                 rtm->rtm_type = RTN_LOCAL;
1581         else
1582                 rtm->rtm_type = RTN_UNICAST;
1583         rtm->rtm_flags = 0;
1584         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1585         rtm->rtm_protocol = rt->rt6i_protocol;
1586         if (rt->rt6i_flags&RTF_DYNAMIC)
1587                 rtm->rtm_protocol = RTPROT_REDIRECT;
1588         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1589                 rtm->rtm_protocol = RTPROT_KERNEL;
1590         else if (rt->rt6i_flags&RTF_DEFAULT)
1591                 rtm->rtm_protocol = RTPROT_RA;
1592
1593         if (rt->rt6i_flags&RTF_CACHE)
1594                 rtm->rtm_flags |= RTM_F_CLONED;
1595
1596         if (dst) {
1597                 RTA_PUT(skb, RTA_DST, 16, dst);
1598                 rtm->rtm_dst_len = 128;
1599         } else if (rtm->rtm_dst_len)
1600                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1601 #ifdef CONFIG_IPV6_SUBTREES
1602         if (src) {
1603                 RTA_PUT(skb, RTA_SRC, 16, src);
1604                 rtm->rtm_src_len = 128;
1605         } else if (rtm->rtm_src_len)
1606                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1607 #endif
1608         if (iif)
1609                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1610         else if (dst) {
1611                 struct in6_addr saddr_buf;
1612                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1613                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1614         }
1615         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1616                 goto rtattr_failure;
1617         if (rt->u.dst.neighbour)
1618                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1619         if (rt->u.dst.dev)
1620                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1621         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1622         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1623         if (rt->rt6i_expires)
1624                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1625         else
1626                 ci.rta_expires = 0;
1627         ci.rta_used = rt->u.dst.__use;
1628         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1629         ci.rta_error = rt->u.dst.error;
1630         ci.rta_id = 0;
1631         ci.rta_ts = 0;
1632         ci.rta_tsage = 0;
1633         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1634         nlh->nlmsg_len = skb->tail - b;
1635         return skb->len;
1636
1637 nlmsg_failure:
1638 rtattr_failure:
1639         skb_trim(skb, b - skb->data);
1640         return -1;
1641 }
1642
1643 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1644 {
1645         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1646         int prefix;
1647
1648         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1649                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1650                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1651         } else
1652                 prefix = 0;
1653
1654         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1655                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1656                      NULL, prefix);
1657 }
1658
1659 static int fib6_dump_node(struct fib6_walker_t *w)
1660 {
1661         int res;
1662         struct rt6_info *rt;
1663
1664         for (rt = w->leaf; rt; rt = rt->u.next) {
1665                 res = rt6_dump_route(rt, w->args);
1666                 if (res < 0) {
1667                         /* Frame is full, suspend walking */
1668                         w->leaf = rt;
1669                         return 1;
1670                 }
1671                 BUG_TRAP(res!=0);
1672         }
1673         w->leaf = NULL;
1674         return 0;
1675 }
1676
1677 static void fib6_dump_end(struct netlink_callback *cb)
1678 {
1679         struct fib6_walker_t *w = (void*)cb->args[0];
1680
1681         if (w) {
1682                 cb->args[0] = 0;
1683                 fib6_walker_unlink(w);
1684                 kfree(w);
1685         }
1686         if (cb->args[1]) {
1687                 cb->done = (void*)cb->args[1];
1688                 cb->args[1] = 0;
1689         }
1690 }
1691
1692 static int fib6_dump_done(struct netlink_callback *cb)
1693 {
1694         fib6_dump_end(cb);
1695         return cb->done(cb);
1696 }
1697
1698 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1699 {
1700         struct rt6_rtnl_dump_arg arg;
1701         struct fib6_walker_t *w;
1702         int res;
1703
1704         arg.skb = skb;
1705         arg.cb = cb;
1706
1707         w = (void*)cb->args[0];
1708         if (w == NULL) {
1709                 /* New dump:
1710                  * 
1711                  * 1. hook callback destructor.
1712                  */
1713                 cb->args[1] = (long)cb->done;
1714                 cb->done = fib6_dump_done;
1715
1716                 /*
1717                  * 2. allocate and initialize walker.
1718                  */
1719                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1720                 if (w == NULL)
1721                         return -ENOMEM;
1722                 RT6_TRACE("dump<%p", w);
1723                 memset(w, 0, sizeof(*w));
1724                 w->root = &ip6_routing_table;
1725                 w->func = fib6_dump_node;
1726                 w->args = &arg;
1727                 cb->args[0] = (long)w;
1728                 read_lock_bh(&rt6_lock);
1729                 res = fib6_walk(w);
1730                 read_unlock_bh(&rt6_lock);
1731         } else {
1732                 w->args = &arg;
1733                 read_lock_bh(&rt6_lock);
1734                 res = fib6_walk_continue(w);
1735                 read_unlock_bh(&rt6_lock);
1736         }
1737 #if RT6_DEBUG >= 3
1738         if (res <= 0 && skb->len == 0)
1739                 RT6_TRACE("%p>dump end\n", w);
1740 #endif
1741         res = res < 0 ? res : skb->len;
1742         /* res < 0 is an error. (really, impossible)
1743            res == 0 means that dump is complete, but skb still can contain data.
1744            res > 0 dump is not complete, but frame is full.
1745          */
1746         /* Destroy walker, if dump of this table is complete. */
1747         if (res <= 0)
1748                 fib6_dump_end(cb);
1749         return res;
1750 }
1751
1752 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1753 {
1754         struct rtattr **rta = arg;
1755         int iif = 0;
1756         int err = -ENOBUFS;
1757         struct sk_buff *skb;
1758         struct flowi fl;
1759         struct rt6_info *rt;
1760
1761         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1762         if (skb == NULL)
1763                 goto out;
1764
1765         /* Reserve room for dummy headers, this skb can pass
1766            through good chunk of routing engine.
1767          */
1768         skb->mac.raw = skb->data;
1769         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1770
1771         memset(&fl, 0, sizeof(fl));
1772         if (rta[RTA_SRC-1])
1773                 ipv6_addr_copy(&fl.fl6_src,
1774                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1775         if (rta[RTA_DST-1])
1776                 ipv6_addr_copy(&fl.fl6_dst,
1777                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1778
1779         if (rta[RTA_IIF-1])
1780                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1781
1782         if (iif) {
1783                 struct net_device *dev;
1784                 dev = __dev_get_by_index(iif);
1785                 if (!dev) {
1786                         err = -ENODEV;
1787                         goto out_free;
1788                 }
1789         }
1790
1791         fl.oif = 0;
1792         if (rta[RTA_OIF-1])
1793                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1794
1795         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1796
1797         skb->dst = &rt->u.dst;
1798
1799         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1800         err = rt6_fill_node(skb, rt, 
1801                             &fl.fl6_dst, &fl.fl6_src,
1802                             iif,
1803                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1804                             nlh->nlmsg_seq, nlh, 0);
1805         if (err < 0) {
1806                 err = -EMSGSIZE;
1807                 goto out_free;
1808         }
1809
1810         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1811         if (err > 0)
1812                 err = 0;
1813 out:
1814         return err;
1815 out_free:
1816         kfree_skb(skb);
1817         goto out;       
1818 }
1819
1820 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1821 {
1822         struct sk_buff *skb;
1823         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1824
1825         skb = alloc_skb(size, gfp_any());
1826         if (!skb) {
1827                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1828                 return;
1829         }
1830         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1831                 kfree_skb(skb);
1832                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1833                 return;
1834         }
1835         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1836         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1837 }
1838
1839 /*
1840  *      /proc
1841  */
1842
1843 #ifdef CONFIG_PROC_FS
1844
1845 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1846
1847 struct rt6_proc_arg
1848 {
1849         char *buffer;
1850         int offset;
1851         int length;
1852         int skip;
1853         int len;
1854 };
1855
1856 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1857 {
1858         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1859         int i;
1860
1861         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1862                 arg->skip++;
1863                 return 0;
1864         }
1865
1866         if (arg->len >= arg->length)
1867                 return 0;
1868
1869         for (i=0; i<16; i++) {
1870                 sprintf(arg->buffer + arg->len, "%02x",
1871                         rt->rt6i_dst.addr.s6_addr[i]);
1872                 arg->len += 2;
1873         }
1874         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1875                             rt->rt6i_dst.plen);
1876
1877 #ifdef CONFIG_IPV6_SUBTREES
1878         for (i=0; i<16; i++) {
1879                 sprintf(arg->buffer + arg->len, "%02x",
1880                         rt->rt6i_src.addr.s6_addr[i]);
1881                 arg->len += 2;
1882         }
1883         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1884                             rt->rt6i_src.plen);
1885 #else
1886         sprintf(arg->buffer + arg->len,
1887                 "00000000000000000000000000000000 00 ");
1888         arg->len += 36;
1889 #endif
1890
1891         if (rt->rt6i_nexthop) {
1892                 for (i=0; i<16; i++) {
1893                         sprintf(arg->buffer + arg->len, "%02x",
1894                                 rt->rt6i_nexthop->primary_key[i]);
1895                         arg->len += 2;
1896                 }
1897         } else {
1898                 sprintf(arg->buffer + arg->len,
1899                         "00000000000000000000000000000000");
1900                 arg->len += 32;
1901         }
1902         arg->len += sprintf(arg->buffer + arg->len,
1903                             " %08x %08x %08x %08x %8s\n",
1904                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1905                             rt->u.dst.__use, rt->rt6i_flags, 
1906                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1907         return 0;
1908 }
1909
1910 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1911 {
1912         struct rt6_proc_arg arg;
1913         arg.buffer = buffer;
1914         arg.offset = offset;
1915         arg.length = length;
1916         arg.skip = 0;
1917         arg.len = 0;
1918
1919         read_lock_bh(&rt6_lock);
1920         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1921         read_unlock_bh(&rt6_lock);
1922
1923         *start = buffer;
1924         if (offset)
1925                 *start += offset % RT6_INFO_LEN;
1926
1927         arg.len -= offset % RT6_INFO_LEN;
1928
1929         if (arg.len > length)
1930                 arg.len = length;
1931         if (arg.len < 0)
1932                 arg.len = 0;
1933
1934         return arg.len;
1935 }
1936
1937 extern struct rt6_statistics rt6_stats;
1938
1939 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1940 {
1941         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1942                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1943                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1944                       rt6_stats.fib_rt_cache,
1945                       atomic_read(&ip6_dst_ops.entries),
1946                       rt6_stats.fib_discarded_routes);
1947
1948         return 0;
1949 }
1950
1951 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1952 {
1953         return single_open(file, rt6_stats_seq_show, NULL);
1954 }
1955
1956 static struct file_operations rt6_stats_seq_fops = {
1957         .owner   = THIS_MODULE,
1958         .open    = rt6_stats_seq_open,
1959         .read    = seq_read,
1960         .llseek  = seq_lseek,
1961         .release = single_release,
1962 };
1963 #endif  /* CONFIG_PROC_FS */
1964
1965 #ifdef CONFIG_SYSCTL
1966
1967 static int flush_delay;
1968
1969 static
1970 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1971                               void __user *buffer, size_t *lenp, loff_t *ppos)
1972 {
1973         if (write) {
1974                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1975                 if (flush_delay < 0)
1976                         flush_delay = 0;
1977                 fib6_run_gc((unsigned long)flush_delay);
1978                 return 0;
1979         } else
1980                 return -EINVAL;
1981 }
1982
1983 ctl_table ipv6_route_table[] = {
1984         {
1985                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1986                 .procname       =       "flush",
1987                 .data           =       &flush_delay,
1988                 .maxlen         =       sizeof(int),
1989                 .mode           =       0644,
1990                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1991         },
1992         {
1993                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1994                 .procname       =       "gc_thresh",
1995                 .data           =       &ip6_dst_ops.gc_thresh,
1996                 .maxlen         =       sizeof(int),
1997                 .mode           =       0644,
1998                 .proc_handler   =       &proc_dointvec,
1999         },
2000         {
2001                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2002                 .procname       =       "max_size",
2003                 .data           =       &ip6_rt_max_size,
2004                 .maxlen         =       sizeof(int),
2005                 .mode           =       0644,
2006                 .proc_handler   =       &proc_dointvec,
2007         },
2008         {
2009                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2010                 .procname       =       "gc_min_interval",
2011                 .data           =       &ip6_rt_gc_min_interval,
2012                 .maxlen         =       sizeof(int),
2013                 .mode           =       0644,
2014                 .proc_handler   =       &proc_dointvec_jiffies,
2015                 .strategy       =       &sysctl_jiffies,
2016         },
2017         {
2018                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2019                 .procname       =       "gc_timeout",
2020                 .data           =       &ip6_rt_gc_timeout,
2021                 .maxlen         =       sizeof(int),
2022                 .mode           =       0644,
2023                 .proc_handler   =       &proc_dointvec_jiffies,
2024                 .strategy       =       &sysctl_jiffies,
2025         },
2026         {
2027                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2028                 .procname       =       "gc_interval",
2029                 .data           =       &ip6_rt_gc_interval,
2030                 .maxlen         =       sizeof(int),
2031                 .mode           =       0644,
2032                 .proc_handler   =       &proc_dointvec_jiffies,
2033                 .strategy       =       &sysctl_jiffies,
2034         },
2035         {
2036                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2037                 .procname       =       "gc_elasticity",
2038                 .data           =       &ip6_rt_gc_elasticity,
2039                 .maxlen         =       sizeof(int),
2040                 .mode           =       0644,
2041                 .proc_handler   =       &proc_dointvec_jiffies,
2042                 .strategy       =       &sysctl_jiffies,
2043         },
2044         {
2045                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2046                 .procname       =       "mtu_expires",
2047                 .data           =       &ip6_rt_mtu_expires,
2048                 .maxlen         =       sizeof(int),
2049                 .mode           =       0644,
2050                 .proc_handler   =       &proc_dointvec_jiffies,
2051                 .strategy       =       &sysctl_jiffies,
2052         },
2053         {
2054                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2055                 .procname       =       "min_adv_mss",
2056                 .data           =       &ip6_rt_min_advmss,
2057                 .maxlen         =       sizeof(int),
2058                 .mode           =       0644,
2059                 .proc_handler   =       &proc_dointvec_jiffies,
2060                 .strategy       =       &sysctl_jiffies,
2061         },
2062         { .ctl_name = 0 }
2063 };
2064
2065 #endif
2066
2067 void __init ip6_route_init(void)
2068 {
2069         struct proc_dir_entry *p;
2070
2071         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2072                                                      sizeof(struct rt6_info),
2073                                                      0, SLAB_HWCACHE_ALIGN,
2074                                                      NULL, NULL);
2075         if (!ip6_dst_ops.kmem_cachep)
2076                 panic("cannot create ip6_dst_cache");
2077
2078         fib6_init();
2079 #ifdef  CONFIG_PROC_FS
2080         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2081         if (p)
2082                 p->owner = THIS_MODULE;
2083
2084         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2085 #endif
2086 #ifdef CONFIG_XFRM
2087         xfrm6_init();
2088 #endif
2089 }
2090
2091 void __exit ip6_route_cleanup(void)
2092 {
2093 #ifdef CONFIG_PROC_FS
2094         proc_net_remove("ipv6_route");
2095         proc_net_remove("rt6_stats");
2096 #endif
2097 #ifdef CONFIG_XFRM
2098         xfrm6_fini();
2099 #endif
2100         rt6_ifdown(NULL);
2101         fib6_gc_cleanup();
2102         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2103 }