VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *, int how);
88 static int               ip6_dst_gc(void);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void             ip6_link_failure(struct sk_buff *skb);
93 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
94
95 static struct dst_ops ip6_dst_ops = {
96         .family                 =       AF_INET6,
97         .protocol               =       __constant_htons(ETH_P_IPV6),
98         .gc                     =       ip6_dst_gc,
99         .gc_thresh              =       1024,
100         .check                  =       ip6_dst_check,
101         .destroy                =       ip6_dst_destroy,
102         .ifdown                 =       ip6_dst_ifdown,
103         .negative_advice        =       ip6_negative_advice,
104         .link_failure           =       ip6_link_failure,
105         .update_pmtu            =       ip6_rt_update_pmtu,
106         .entry_size             =       sizeof(struct rt6_info),
107 };
108
109 struct rt6_info ip6_null_entry = {
110         .u = {
111                 .dst = {
112                         .__refcnt       = ATOMIC_INIT(1),
113                         .__use          = 1,
114                         .dev            = &loopback_dev,
115                         .obsolete       = -1,
116                         .error          = -ENETUNREACH,
117                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
118                         .input          = ip6_pkt_discard,
119                         .output         = ip6_pkt_discard_out,
120                         .ops            = &ip6_dst_ops,
121                         .path           = (struct dst_entry*)&ip6_null_entry,
122                 }
123         },
124         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
125         .rt6i_metric    = ~(u32) 0,
126         .rt6i_ref       = ATOMIC_INIT(1),
127 };
128
129 struct fib6_node ip6_routing_table = {
130         .leaf           = &ip6_null_entry,
131         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
132 };
133
134 /* Protects all the ip6 fib */
135
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
137
138
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
141 {
142         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
143 }
144
145 static void ip6_dst_destroy(struct dst_entry *dst)
146 {
147         struct rt6_info *rt = (struct rt6_info *)dst;
148         struct inet6_dev *idev = rt->rt6i_idev;
149
150         if (idev != NULL) {
151                 rt->rt6i_idev = NULL;
152                 in6_dev_put(idev);
153         }       
154 }
155
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
157 {
158         ip6_dst_destroy(dst);
159 }
160
161 /*
162  *      Route lookup. Any rt6_lock is implied.
163  */
164
165 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
166                                                     int oif,
167                                                     int strict)
168 {
169         struct rt6_info *local = NULL;
170         struct rt6_info *sprt;
171
172         if (oif) {
173                 for (sprt = rt; sprt; sprt = sprt->u.next) {
174                         struct net_device *dev = sprt->rt6i_dev;
175                         if (dev->ifindex == oif)
176                                 return sprt;
177                         if (dev->flags&IFF_LOOPBACK)
178                                 local = sprt;
179                 }
180
181                 if (local)
182                         return local;
183
184                 if (strict)
185                         return &ip6_null_entry;
186         }
187         return rt;
188 }
189
190 /*
191  *      pointer to the last default router chosen. BH is disabled locally.
192  */
193 static struct rt6_info *rt6_dflt_pointer;
194 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
195
196 /* Default Router Selection (RFC 2461 6.3.6) */
197 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
198 {
199         struct rt6_info *match = NULL;
200         struct rt6_info *sprt;
201         int mpri = 0;
202
203         for (sprt = rt; sprt; sprt = sprt->u.next) {
204                 struct neighbour *neigh;
205                 int m = 0;
206
207                 if (!oif ||
208                     (sprt->rt6i_dev &&
209                      sprt->rt6i_dev->ifindex == oif))
210                         m += 8;
211
212                 if (sprt == rt6_dflt_pointer)
213                         m += 4;
214
215                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
216                         read_lock_bh(&neigh->lock);
217                         switch (neigh->nud_state) {
218                         case NUD_REACHABLE:
219                                 m += 3;
220                                 break;
221
222                         case NUD_STALE:
223                         case NUD_DELAY:
224                         case NUD_PROBE:
225                                 m += 2;
226                                 break;
227
228                         case NUD_NOARP:
229                         case NUD_PERMANENT:
230                                 m += 1;
231                                 break;
232
233                         case NUD_INCOMPLETE:
234                         default:
235                                 read_unlock_bh(&neigh->lock);
236                                 continue;
237                         }
238                         read_unlock_bh(&neigh->lock);
239                 } else {
240                         continue;
241                 }
242
243                 if (m > mpri || m >= 12) {
244                         match = sprt;
245                         mpri = m;
246                         if (m >= 12) {
247                                 /* we choose the last default router if it
248                                  * is in (probably) reachable state.
249                                  * If route changed, we should do pmtu
250                                  * discovery. --yoshfuji
251                                  */
252                                 break;
253                         }
254                 }
255         }
256
257         spin_lock(&rt6_dflt_lock);
258         if (!match) {
259                 /*
260                  *      No default routers are known to be reachable.
261                  *      SHOULD round robin
262                  */
263                 if (rt6_dflt_pointer) {
264                         for (sprt = rt6_dflt_pointer->u.next;
265                              sprt; sprt = sprt->u.next) {
266                                 if (sprt->u.dst.obsolete <= 0 &&
267                                     sprt->u.dst.error == 0) {
268                                         match = sprt;
269                                         break;
270                                 }
271                         }
272                         for (sprt = rt;
273                              !match && sprt;
274                              sprt = sprt->u.next) {
275                                 if (sprt->u.dst.obsolete <= 0 &&
276                                     sprt->u.dst.error == 0) {
277                                         match = sprt;
278                                         break;
279                                 }
280                                 if (sprt == rt6_dflt_pointer)
281                                         break;
282                         }
283                 }
284         }
285
286         if (match) {
287                 if (rt6_dflt_pointer != match)
288                         RT6_TRACE("changed default router: %p->%p\n",
289                                   rt6_dflt_pointer, match);
290                 rt6_dflt_pointer = match;
291         }
292         spin_unlock(&rt6_dflt_lock);
293
294         if (!match) {
295                 /*
296                  * Last Resort: if no default routers found, 
297                  * use addrconf default route.
298                  * We don't record this route.
299                  */
300                 for (sprt = ip6_routing_table.leaf;
301                      sprt; sprt = sprt->u.next) {
302                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
303                             (!oif ||
304                              (sprt->rt6i_dev &&
305                               sprt->rt6i_dev->ifindex == oif))) {
306                                 match = sprt;
307                                 break;
308                         }
309                 }
310                 if (!match) {
311                         /* no default route.  give up. */
312                         match = &ip6_null_entry;
313                 }
314         }
315
316         return match;
317 }
318
319 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
320                             int oif, int strict)
321 {
322         struct fib6_node *fn;
323         struct rt6_info *rt;
324
325         read_lock_bh(&rt6_lock);
326         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
327         rt = rt6_device_match(fn->leaf, oif, strict);
328         dst_hold(&rt->u.dst);
329         rt->u.dst.__use++;
330         read_unlock_bh(&rt6_lock);
331
332         rt->u.dst.lastuse = jiffies;
333         if (rt->u.dst.error == 0)
334                 return rt;
335         dst_release(&rt->u.dst);
336         return NULL;
337 }
338
339 /* rt6_ins is called with FREE rt6_lock.
340    It takes new route entry, the addition fails by any reason the
341    route is freed. In any case, if caller does not hold it, it may
342    be destroyed.
343  */
344
345 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
346 {
347         int err;
348
349         write_lock_bh(&rt6_lock);
350         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
351         write_unlock_bh(&rt6_lock);
352
353         return err;
354 }
355
356 /* No rt6_lock! If COW failed, the function returns dead route entry
357    with dst->error set to errno value.
358  */
359
360 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
361                                 struct in6_addr *saddr)
362 {
363         int err;
364         struct rt6_info *rt;
365
366         /*
367          *      Clone the route.
368          */
369
370         rt = ip6_rt_copy(ort);
371
372         if (rt) {
373                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
374
375                 if (!(rt->rt6i_flags&RTF_GATEWAY))
376                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
377
378                 rt->rt6i_dst.plen = 128;
379                 rt->rt6i_flags |= RTF_CACHE;
380                 rt->u.dst.flags |= DST_HOST;
381
382 #ifdef CONFIG_IPV6_SUBTREES
383                 if (rt->rt6i_src.plen && saddr) {
384                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
385                         rt->rt6i_src.plen = 128;
386                 }
387 #endif
388
389                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
390
391                 dst_hold(&rt->u.dst);
392
393                 err = rt6_ins(rt, NULL, NULL);
394                 if (err == 0)
395                         return rt;
396
397                 rt->u.dst.error = err;
398
399                 return rt;
400         }
401         dst_hold(&ip6_null_entry.u.dst);
402         return &ip6_null_entry;
403 }
404
405 #define BACKTRACK() \
406 if (rt == &ip6_null_entry && strict) { \
407        while ((fn = fn->parent) != NULL) { \
408                 if (fn->fn_flags & RTN_ROOT) { \
409                         dst_hold(&rt->u.dst); \
410                         goto out; \
411                 } \
412                 if (fn->fn_flags & RTN_RTINFO) \
413                         goto restart; \
414         } \
415 }
416
417
418 void ip6_route_input(struct sk_buff *skb)
419 {
420         struct fib6_node *fn;
421         struct rt6_info *rt;
422         int strict;
423         int attempts = 3;
424
425         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
426
427 relookup:
428         read_lock_bh(&rt6_lock);
429
430         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
431                          &skb->nh.ipv6h->saddr);
432
433 restart:
434         rt = fn->leaf;
435
436         if ((rt->rt6i_flags & RTF_CACHE)) {
437                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
438                 BACKTRACK();
439                 dst_hold(&rt->u.dst);
440                 goto out;
441         }
442
443         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
444         BACKTRACK();
445
446         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
447                 read_unlock_bh(&rt6_lock);
448
449                 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
450                              &skb->nh.ipv6h->saddr);
451                         
452                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
453                         goto out2;
454                 /* Race condition! In the gap, when rt6_lock was
455                    released someone could insert this route.  Relookup.
456                 */
457                 dst_release(&rt->u.dst);
458                 goto relookup;
459         }
460         dst_hold(&rt->u.dst);
461
462 out:
463         read_unlock_bh(&rt6_lock);
464 out2:
465         rt->u.dst.lastuse = jiffies;
466         rt->u.dst.__use++;
467         skb->dst = (struct dst_entry *) rt;
468 }
469
470 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
471 {
472         struct fib6_node *fn;
473         struct rt6_info *rt;
474         int strict;
475         int attempts = 3;
476
477         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
478
479 relookup:
480         read_lock_bh(&rt6_lock);
481
482         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
483
484 restart:
485         rt = fn->leaf;
486
487         if ((rt->rt6i_flags & RTF_CACHE)) {
488                 rt = rt6_device_match(rt, fl->oif, strict);
489                 BACKTRACK();
490                 dst_hold(&rt->u.dst);
491                 goto out;
492         }
493         if (rt->rt6i_flags & RTF_DEFAULT) {
494                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
495                         rt = rt6_best_dflt(rt, fl->oif);
496         } else {
497                 rt = rt6_device_match(rt, fl->oif, strict);
498                 BACKTRACK();
499         }
500
501         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
502                 read_unlock_bh(&rt6_lock);
503
504                 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
505
506                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
507                         goto out2;
508
509                 /* Race condition! In the gap, when rt6_lock was
510                    released someone could insert this route.  Relookup.
511                 */
512                 dst_release(&rt->u.dst);
513                 goto relookup;
514         }
515         dst_hold(&rt->u.dst);
516
517 out:
518         read_unlock_bh(&rt6_lock);
519 out2:
520         rt->u.dst.lastuse = jiffies;
521         rt->u.dst.__use++;
522         return &rt->u.dst;
523 }
524
525
526 /*
527  *      Destination cache support functions
528  */
529
530 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
531 {
532         struct rt6_info *rt;
533
534         rt = (struct rt6_info *) dst;
535
536         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
537                 return dst;
538
539         dst_release(dst);
540         return NULL;
541 }
542
543 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
544 {
545         struct rt6_info *rt = (struct rt6_info *) dst;
546
547         if (rt) {
548                 if (rt->rt6i_flags & RTF_CACHE)
549                         ip6_del_rt(rt, NULL, NULL);
550                 else
551                         dst_release(dst);
552         }
553         return NULL;
554 }
555
556 static void ip6_link_failure(struct sk_buff *skb)
557 {
558         struct rt6_info *rt;
559
560         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
561
562         rt = (struct rt6_info *) skb->dst;
563         if (rt) {
564                 if (rt->rt6i_flags&RTF_CACHE) {
565                         dst_set_expires(&rt->u.dst, 0);
566                         rt->rt6i_flags |= RTF_EXPIRES;
567                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
568                         rt->rt6i_node->fn_sernum = -1;
569         }
570 }
571
572 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
573 {
574         struct rt6_info *rt6 = (struct rt6_info*)dst;
575
576         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
577                 rt6->rt6i_flags |= RTF_MODIFIED;
578                 if (mtu < IPV6_MIN_MTU)
579                         mtu = IPV6_MIN_MTU;
580                 dst->metrics[RTAX_MTU-1] = mtu;
581         }
582 }
583
584 /* Protected by rt6_lock.  */
585 static struct dst_entry *ndisc_dst_gc_list;
586 static int ipv6_get_mtu(struct net_device *dev);
587
588 static inline unsigned int ipv6_advmss(unsigned int mtu)
589 {
590         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
591
592         if (mtu < ip6_rt_min_advmss)
593                 mtu = ip6_rt_min_advmss;
594
595         /*
596          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
597          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
598          * IPV6_MAXPLEN is also valid and means: "any MSS, 
599          * rely only on pmtu discovery"
600          */
601         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
602                 mtu = IPV6_MAXPLEN;
603         return mtu;
604 }
605
606 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
607                                   struct neighbour *neigh,
608                                   struct in6_addr *addr,
609                                   int (*output)(struct sk_buff **))
610 {
611         struct rt6_info *rt = ip6_dst_alloc();
612
613         if (unlikely(rt == NULL))
614                 goto out;
615
616         dev_hold(dev);
617         if (neigh)
618                 neigh_hold(neigh);
619         else
620                 neigh = ndisc_get_neigh(dev, addr);
621
622         rt->rt6i_dev      = dev;
623         rt->rt6i_idev     = in6_dev_get(dev);
624         rt->rt6i_nexthop  = neigh;
625         atomic_set(&rt->u.dst.__refcnt, 1);
626         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
627         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
628         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
629         rt->u.dst.output  = output;
630
631 #if 0   /* there's no chance to use these for ndisc */
632         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
633                                 ? DST_HOST 
634                                 : 0;
635         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
636         rt->rt6i_dst.plen = 128;
637 #endif
638
639         write_lock_bh(&rt6_lock);
640         rt->u.dst.next = ndisc_dst_gc_list;
641         ndisc_dst_gc_list = &rt->u.dst;
642         write_unlock_bh(&rt6_lock);
643
644         fib6_force_start_gc();
645
646 out:
647         return (struct dst_entry *)rt;
648 }
649
650 int ndisc_dst_gc(int *more)
651 {
652         struct dst_entry *dst, *next, **pprev;
653         int freed;
654
655         next = NULL;
656         pprev = &ndisc_dst_gc_list;
657         freed = 0;
658         while ((dst = *pprev) != NULL) {
659                 if (!atomic_read(&dst->__refcnt)) {
660                         *pprev = dst->next;
661                         dst_free(dst);
662                         freed++;
663                 } else {
664                         pprev = &dst->next;
665                         (*more)++;
666                 }
667         }
668
669         return freed;
670 }
671
672 static int ip6_dst_gc(void)
673 {
674         static unsigned expire = 30*HZ;
675         static unsigned long last_gc;
676         unsigned long now = jiffies;
677
678         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
679             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
680                 goto out;
681
682         expire++;
683         fib6_run_gc(expire);
684         last_gc = now;
685         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
686                 expire = ip6_rt_gc_timeout>>1;
687
688 out:
689         expire -= expire>>ip6_rt_gc_elasticity;
690         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
691 }
692
693 /* Clean host part of a prefix. Not necessary in radix tree,
694    but results in cleaner routing tables.
695
696    Remove it only when all the things will work!
697  */
698
699 static int ipv6_get_mtu(struct net_device *dev)
700 {
701         int mtu = IPV6_MIN_MTU;
702         struct inet6_dev *idev;
703
704         idev = in6_dev_get(dev);
705         if (idev) {
706                 mtu = idev->cnf.mtu6;
707                 in6_dev_put(idev);
708         }
709         return mtu;
710 }
711
712 static int ipv6_get_hoplimit(struct net_device *dev)
713 {
714         int hoplimit = ipv6_devconf.hop_limit;
715         struct inet6_dev *idev;
716
717         idev = in6_dev_get(dev);
718         if (idev) {
719                 hoplimit = idev->cnf.hop_limit;
720                 in6_dev_put(idev);
721         }
722         return hoplimit;
723 }
724
725 /*
726  *
727  */
728
729 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
730 {
731         int err;
732         struct rtmsg *r;
733         struct rtattr **rta;
734         struct rt6_info *rt;
735         struct net_device *dev = NULL;
736         int addr_type;
737
738         rta = (struct rtattr **) _rtattr;
739
740         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
741                 return -EINVAL;
742 #ifndef CONFIG_IPV6_SUBTREES
743         if (rtmsg->rtmsg_src_len)
744                 return -EINVAL;
745 #endif
746         if (rtmsg->rtmsg_ifindex) {
747                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
748                 if (!dev)
749                         return -ENODEV;
750         }
751
752         if (rtmsg->rtmsg_metric == 0)
753                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
754
755         rt = ip6_dst_alloc();
756
757         if (rt == NULL)
758                 return -ENOMEM;
759
760         rt->u.dst.obsolete = -1;
761         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
762         if (nlh && (r = NLMSG_DATA(nlh))) {
763                 rt->rt6i_protocol = r->rtm_protocol;
764         } else {
765                 rt->rt6i_protocol = RTPROT_BOOT;
766         }
767
768         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
769
770         if (addr_type & IPV6_ADDR_MULTICAST)
771                 rt->u.dst.input = ip6_mc_input;
772         else
773                 rt->u.dst.input = ip6_forward;
774
775         rt->u.dst.output = ip6_output;
776
777         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
778                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
779         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
780         if (rt->rt6i_dst.plen == 128)
781                rt->u.dst.flags = DST_HOST;
782
783 #ifdef CONFIG_IPV6_SUBTREES
784         ipv6_addr_prefix(&rt->rt6i_src.addr, 
785                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
786         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
787 #endif
788
789         rt->rt6i_metric = rtmsg->rtmsg_metric;
790
791         /* We cannot add true routes via loopback here,
792            they would result in kernel looping; promote them to reject routes
793          */
794         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
795             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
796                 if (dev)
797                         dev_put(dev);
798                 dev = &loopback_dev;
799                 dev_hold(dev);
800                 rt->u.dst.output = ip6_pkt_discard_out;
801                 rt->u.dst.input = ip6_pkt_discard;
802                 rt->u.dst.error = -ENETUNREACH;
803                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
804                 goto install_route;
805         }
806
807         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
808                 struct in6_addr *gw_addr;
809                 int gwa_type;
810
811                 gw_addr = &rtmsg->rtmsg_gateway;
812                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
813                 gwa_type = ipv6_addr_type(gw_addr);
814
815                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
816                         struct rt6_info *grt;
817
818                         /* IPv6 strictly inhibits using not link-local
819                            addresses as nexthop address.
820                            Otherwise, router will not able to send redirects.
821                            It is very good, but in some (rare!) circumstances
822                            (SIT, PtP, NBMA NOARP links) it is handy to allow
823                            some exceptions. --ANK
824                          */
825                         err = -EINVAL;
826                         if (!(gwa_type&IPV6_ADDR_UNICAST))
827                                 goto out;
828
829                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
830
831                         err = -EHOSTUNREACH;
832                         if (grt == NULL)
833                                 goto out;
834                         if (dev) {
835                                 if (dev != grt->rt6i_dev) {
836                                         dst_release(&grt->u.dst);
837                                         goto out;
838                                 }
839                         } else {
840                                 dev = grt->rt6i_dev;
841                                 dev_hold(dev);
842                         }
843                         if (!(grt->rt6i_flags&RTF_GATEWAY))
844                                 err = 0;
845                         dst_release(&grt->u.dst);
846
847                         if (err)
848                                 goto out;
849                 }
850                 err = -EINVAL;
851                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
852                         goto out;
853         }
854
855         err = -ENODEV;
856         if (dev == NULL)
857                 goto out;
858
859         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
860                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
861                 if (IS_ERR(rt->rt6i_nexthop)) {
862                         err = PTR_ERR(rt->rt6i_nexthop);
863                         rt->rt6i_nexthop = NULL;
864                         goto out;
865                 }
866         }
867
868         rt->rt6i_flags = rtmsg->rtmsg_flags;
869
870 install_route:
871         if (rta && rta[RTA_METRICS-1]) {
872                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
873                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
874
875                 while (RTA_OK(attr, attrlen)) {
876                         unsigned flavor = attr->rta_type;
877                         if (flavor) {
878                                 if (flavor > RTAX_MAX) {
879                                         err = -EINVAL;
880                                         goto out;
881                                 }
882                                 rt->u.dst.metrics[flavor-1] =
883                                         *(u32 *)RTA_DATA(attr);
884                         }
885                         attr = RTA_NEXT(attr, attrlen);
886                 }
887         }
888
889         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
890                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
891                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
892                                 IPV6_DEFAULT_MCASTHOPS;
893                 else
894                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
895                                 ipv6_get_hoplimit(dev);
896         }
897
898         if (!rt->u.dst.metrics[RTAX_MTU-1])
899                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
900         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
901                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
902         rt->u.dst.dev = dev;
903         rt->rt6i_idev = in6_dev_get(dev);
904         return rt6_ins(rt, nlh, _rtattr);
905
906 out:
907         if (dev)
908                 dev_put(dev);
909         dst_free((struct dst_entry *) rt);
910         return err;
911 }
912
913 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
914 {
915         int err;
916
917         write_lock_bh(&rt6_lock);
918
919         spin_lock_bh(&rt6_dflt_lock);
920         rt6_dflt_pointer = NULL;
921         spin_unlock_bh(&rt6_dflt_lock);
922
923         dst_release(&rt->u.dst);
924
925         err = fib6_del(rt, nlh, _rtattr);
926         write_unlock_bh(&rt6_lock);
927
928         return err;
929 }
930
931 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
932 {
933         struct fib6_node *fn;
934         struct rt6_info *rt;
935         int err = -ESRCH;
936
937         read_lock_bh(&rt6_lock);
938
939         fn = fib6_locate(&ip6_routing_table,
940                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
941                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
942         
943         if (fn) {
944                 for (rt = fn->leaf; rt; rt = rt->u.next) {
945                         if (rtmsg->rtmsg_ifindex &&
946                             (rt->rt6i_dev == NULL ||
947                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
948                                 continue;
949                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
950                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
951                                 continue;
952                         if (rtmsg->rtmsg_metric &&
953                             rtmsg->rtmsg_metric != rt->rt6i_metric)
954                                 continue;
955                         dst_hold(&rt->u.dst);
956                         read_unlock_bh(&rt6_lock);
957
958                         return ip6_del_rt(rt, nlh, _rtattr);
959                 }
960         }
961         read_unlock_bh(&rt6_lock);
962
963         return err;
964 }
965
966 /*
967  *      Handle redirects
968  */
969 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
970                   struct neighbour *neigh, int on_link)
971 {
972         struct rt6_info *rt, *nrt;
973
974         /* Locate old route to this destination. */
975         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
976
977         if (rt == NULL)
978                 return;
979
980         if (neigh->dev != rt->rt6i_dev)
981                 goto out;
982
983         /* Redirect received -> path was valid.
984            Look, redirects are sent only in response to data packets,
985            so that this nexthop apparently is reachable. --ANK
986          */
987         dst_confirm(&rt->u.dst);
988
989         /* Duplicate redirect: silently ignore. */
990         if (neigh == rt->u.dst.neighbour)
991                 goto out;
992
993         /* Current route is on-link; redirect is always invalid.
994            
995            Seems, previous statement is not true. It could
996            be node, which looks for us as on-link (f.e. proxy ndisc)
997            But then router serving it might decide, that we should
998            know truth 8)8) --ANK (980726).
999          */
1000         if (!(rt->rt6i_flags&RTF_GATEWAY))
1001                 goto out;
1002
1003         /*
1004          *      RFC 2461 specifies that redirects should only be
1005          *      accepted if they come from the nexthop to the target.
1006          *      Due to the way default routers are chosen, this notion
1007          *      is a bit fuzzy and one might need to check all default
1008          *      routers.
1009          */
1010
1011         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1012                 if (rt->rt6i_flags & RTF_DEFAULT) {
1013                         struct rt6_info *rt1;
1014
1015                         read_lock(&rt6_lock);
1016                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1017                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1018                                         dst_hold(&rt1->u.dst);
1019                                         dst_release(&rt->u.dst);
1020                                         read_unlock(&rt6_lock);
1021                                         rt = rt1;
1022                                         goto source_ok;
1023                                 }
1024                         }
1025                         read_unlock(&rt6_lock);
1026                 }
1027                 if (net_ratelimit())
1028                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1029                                "for redirect target\n");
1030                 goto out;
1031         }
1032
1033 source_ok:
1034
1035         /*
1036          *      We have finally decided to accept it.
1037          */
1038
1039         nrt = ip6_rt_copy(rt);
1040         if (nrt == NULL)
1041                 goto out;
1042
1043         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1044         if (on_link)
1045                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1046
1047         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1048         nrt->rt6i_dst.plen = 128;
1049         nrt->u.dst.flags |= DST_HOST;
1050
1051         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1052         nrt->rt6i_nexthop = neigh_clone(neigh);
1053         /* Reset pmtu, it may be better */
1054         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1055         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1056
1057         if (rt6_ins(nrt, NULL, NULL))
1058                 goto out;
1059
1060         if (rt->rt6i_flags&RTF_CACHE) {
1061                 ip6_del_rt(rt, NULL, NULL);
1062                 return;
1063         }
1064
1065 out:
1066         dst_release(&rt->u.dst);
1067         return;
1068 }
1069
1070 /*
1071  *      Handle ICMP "packet too big" messages
1072  *      i.e. Path MTU discovery
1073  */
1074
1075 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1076                         struct net_device *dev, u32 pmtu)
1077 {
1078         struct rt6_info *rt, *nrt;
1079
1080         if (pmtu < IPV6_MIN_MTU) {
1081                 if (net_ratelimit())
1082                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1083                                pmtu);
1084                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1085                    link MTU if the node receives a Packet Too Big message
1086                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1087                    */
1088                 pmtu = IPV6_MIN_MTU;
1089         }
1090
1091         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1092
1093         if (rt == NULL)
1094                 return;
1095
1096         if (pmtu >= dst_pmtu(&rt->u.dst))
1097                 goto out;
1098
1099         /* New mtu received -> path was valid.
1100            They are sent only in response to data packets,
1101            so that this nexthop apparently is reachable. --ANK
1102          */
1103         dst_confirm(&rt->u.dst);
1104
1105         /* Host route. If it is static, it would be better
1106            not to override it, but add new one, so that
1107            when cache entry will expire old pmtu
1108            would return automatically.
1109          */
1110         if (rt->rt6i_flags & RTF_CACHE) {
1111                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1112                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1113                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1114                 goto out;
1115         }
1116
1117         /* Network route.
1118            Two cases are possible:
1119            1. It is connected route. Action: COW
1120            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1121          */
1122         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1123                 nrt = rt6_cow(rt, daddr, saddr);
1124                 if (!nrt->u.dst.error) {
1125                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1126                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1127                            happened within 5 mins, the recommended timer is 10 mins.
1128                            Here this route expiration time is set to ip6_rt_mtu_expires
1129                            which is 10 mins. After 10 mins the decreased pmtu is expired
1130                            and detecting PMTU increase will be automatically happened.
1131                          */
1132                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1133                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1134                 }
1135                 dst_release(&nrt->u.dst);
1136         } else {
1137                 nrt = ip6_rt_copy(rt);
1138                 if (nrt == NULL)
1139                         goto out;
1140                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1141                 nrt->rt6i_dst.plen = 128;
1142                 nrt->u.dst.flags |= DST_HOST;
1143                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1144                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1145                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1146                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1147                 rt6_ins(nrt, NULL, NULL);
1148         }
1149
1150 out:
1151         dst_release(&rt->u.dst);
1152 }
1153
1154 /*
1155  *      Misc support functions
1156  */
1157
1158 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1159 {
1160         struct rt6_info *rt = ip6_dst_alloc();
1161
1162         if (rt) {
1163                 rt->u.dst.input = ort->u.dst.input;
1164                 rt->u.dst.output = ort->u.dst.output;
1165
1166                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1167                 rt->u.dst.dev = ort->u.dst.dev;
1168                 if (rt->u.dst.dev)
1169                         dev_hold(rt->u.dst.dev);
1170                 rt->rt6i_idev = ort->rt6i_idev;
1171                 if (rt->rt6i_idev)
1172                         in6_dev_hold(rt->rt6i_idev);
1173                 rt->u.dst.lastuse = jiffies;
1174                 rt->rt6i_expires = 0;
1175
1176                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1177                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1178                 rt->rt6i_metric = 0;
1179
1180                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1181 #ifdef CONFIG_IPV6_SUBTREES
1182                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1183 #endif
1184         }
1185         return rt;
1186 }
1187
1188 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1189 {       
1190         struct rt6_info *rt;
1191         struct fib6_node *fn;
1192
1193         fn = &ip6_routing_table;
1194
1195         write_lock_bh(&rt6_lock);
1196         for (rt = fn->leaf; rt; rt=rt->u.next) {
1197                 if (dev == rt->rt6i_dev &&
1198                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1199                         break;
1200         }
1201         if (rt)
1202                 dst_hold(&rt->u.dst);
1203         write_unlock_bh(&rt6_lock);
1204         return rt;
1205 }
1206
1207 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1208                                      struct net_device *dev)
1209 {
1210         struct in6_rtmsg rtmsg;
1211
1212         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1213         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1214         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1215         rtmsg.rtmsg_metric = 1024;
1216         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1217
1218         rtmsg.rtmsg_ifindex = dev->ifindex;
1219
1220         ip6_route_add(&rtmsg, NULL, NULL);
1221         return rt6_get_dflt_router(gwaddr, dev);
1222 }
1223
1224 void rt6_purge_dflt_routers(int last_resort)
1225 {
1226         struct rt6_info *rt;
1227         u32 flags;
1228
1229         if (last_resort)
1230                 flags = RTF_ALLONLINK;
1231         else
1232                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1233
1234 restart:
1235         read_lock_bh(&rt6_lock);
1236         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1237                 if (rt->rt6i_flags & flags) {
1238                         dst_hold(&rt->u.dst);
1239
1240                         spin_lock_bh(&rt6_dflt_lock);
1241                         rt6_dflt_pointer = NULL;
1242                         spin_unlock_bh(&rt6_dflt_lock);
1243
1244                         read_unlock_bh(&rt6_lock);
1245
1246                         ip6_del_rt(rt, NULL, NULL);
1247
1248                         goto restart;
1249                 }
1250         }
1251         read_unlock_bh(&rt6_lock);
1252 }
1253
1254 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1255 {
1256         struct in6_rtmsg rtmsg;
1257         int err;
1258
1259         switch(cmd) {
1260         case SIOCADDRT:         /* Add a route */
1261         case SIOCDELRT:         /* Delete a route */
1262                 if (!capable(CAP_NET_ADMIN))
1263                         return -EPERM;
1264                 err = copy_from_user(&rtmsg, arg,
1265                                      sizeof(struct in6_rtmsg));
1266                 if (err)
1267                         return -EFAULT;
1268                         
1269                 rtnl_lock();
1270                 switch (cmd) {
1271                 case SIOCADDRT:
1272                         err = ip6_route_add(&rtmsg, NULL, NULL);
1273                         break;
1274                 case SIOCDELRT:
1275                         err = ip6_route_del(&rtmsg, NULL, NULL);
1276                         break;
1277                 default:
1278                         err = -EINVAL;
1279                 }
1280                 rtnl_unlock();
1281
1282                 return err;
1283         };
1284
1285         return -EINVAL;
1286 }
1287
1288 /*
1289  *      Drop the packet on the floor
1290  */
1291
1292 int ip6_pkt_discard(struct sk_buff *skb)
1293 {
1294         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1295         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1296         kfree_skb(skb);
1297         return 0;
1298 }
1299
1300 int ip6_pkt_discard_out(struct sk_buff **pskb)
1301 {
1302         return ip6_pkt_discard(*pskb);
1303 }
1304
1305 /*
1306  *      Add address
1307  */
1308
1309 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1310 {
1311         struct rt6_info *rt = ip6_dst_alloc();
1312
1313         if (rt == NULL)
1314                 return -ENOMEM;
1315
1316         dev_hold(&loopback_dev);
1317
1318         rt->u.dst.flags = DST_HOST;
1319         rt->u.dst.input = ip6_input;
1320         rt->u.dst.output = ip6_output;
1321         rt->rt6i_dev = &loopback_dev;
1322         rt->rt6i_idev = in6_dev_get(&loopback_dev);
1323         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1324         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1325         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1326         rt->u.dst.obsolete = -1;
1327
1328         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1329         if (!anycast)
1330                 rt->rt6i_flags |= RTF_LOCAL;
1331         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1332         if (rt->rt6i_nexthop == NULL) {
1333                 dst_free((struct dst_entry *) rt);
1334                 return -ENOMEM;
1335         }
1336
1337         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1338         rt->rt6i_dst.plen = 128;
1339         rt6_ins(rt, NULL, NULL);
1340
1341         return 0;
1342 }
1343
1344 /* Delete address. Warning: you should check that this address
1345    disappeared before calling this function.
1346  */
1347
1348 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1349 {
1350         struct rt6_info *rt;
1351         int err = -ENOENT;
1352
1353         rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1354         if (rt) {
1355                 if (rt->rt6i_dst.plen == 128)
1356                         err = ip6_del_rt(rt, NULL, NULL);
1357                 else
1358                         dst_release(&rt->u.dst);
1359         }
1360
1361         return err;
1362 }
1363
1364 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1365 {
1366         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1367             rt != &ip6_null_entry) {
1368                 RT6_TRACE("deleted by ifdown %p\n", rt);
1369                 return -1;
1370         }
1371         return 0;
1372 }
1373
1374 void rt6_ifdown(struct net_device *dev)
1375 {
1376         write_lock_bh(&rt6_lock);
1377         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1378         write_unlock_bh(&rt6_lock);
1379 }
1380
1381 struct rt6_mtu_change_arg
1382 {
1383         struct net_device *dev;
1384         unsigned mtu;
1385 };
1386
1387 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1388 {
1389         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1390         struct inet6_dev *idev;
1391
1392         /* In IPv6 pmtu discovery is not optional,
1393            so that RTAX_MTU lock cannot disable it.
1394            We still use this lock to block changes
1395            caused by addrconf/ndisc.
1396         */
1397
1398         idev = __in6_dev_get(arg->dev);
1399         if (idev == NULL)
1400                 return 0;
1401
1402         /* For administrative MTU increase, there is no way to discover
1403            IPv6 PMTU increase, so PMTU increase should be updated here.
1404            Since RFC 1981 doesn't include administrative MTU increase
1405            update PMTU increase is a MUST. (i.e. jumbo frame)
1406          */
1407         /*
1408            If new MTU is less than route PMTU, this new MTU will be the
1409            lowest MTU in the path, update the route PMTU to reflect PMTU
1410            decreases; if new MTU is greater than route PMTU, and the
1411            old MTU is the lowest MTU in the path, update the route PMTU
1412            to reflect the increase. In this case if the other nodes' MTU
1413            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1414            PMTU discouvery.
1415          */
1416         if (rt->rt6i_dev == arg->dev &&
1417             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1418             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1419              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1420               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1421                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1422         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1423         return 0;
1424 }
1425
1426 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1427 {
1428         struct rt6_mtu_change_arg arg;
1429
1430         arg.dev = dev;
1431         arg.mtu = mtu;
1432         read_lock_bh(&rt6_lock);
1433         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1434         read_unlock_bh(&rt6_lock);
1435 }
1436
1437 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1438                               struct in6_rtmsg *rtmsg)
1439 {
1440         memset(rtmsg, 0, sizeof(*rtmsg));
1441
1442         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1443         rtmsg->rtmsg_src_len = r->rtm_src_len;
1444         rtmsg->rtmsg_flags = RTF_UP;
1445         if (r->rtm_type == RTN_UNREACHABLE)
1446                 rtmsg->rtmsg_flags |= RTF_REJECT;
1447
1448         if (rta[RTA_GATEWAY-1]) {
1449                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1450                         return -EINVAL;
1451                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1452                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1453         }
1454         if (rta[RTA_DST-1]) {
1455                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1456                         return -EINVAL;
1457                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1458         }
1459         if (rta[RTA_SRC-1]) {
1460                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1461                         return -EINVAL;
1462                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1463         }
1464         if (rta[RTA_OIF-1]) {
1465                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1466                         return -EINVAL;
1467                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1468         }
1469         if (rta[RTA_PRIORITY-1]) {
1470                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1471                         return -EINVAL;
1472                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1473         }
1474         return 0;
1475 }
1476
1477 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1478 {
1479         struct rtmsg *r = NLMSG_DATA(nlh);
1480         struct in6_rtmsg rtmsg;
1481
1482         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1483                 return -EINVAL;
1484         return ip6_route_del(&rtmsg, nlh, arg);
1485 }
1486
1487 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1488 {
1489         struct rtmsg *r = NLMSG_DATA(nlh);
1490         struct in6_rtmsg rtmsg;
1491
1492         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1493                 return -EINVAL;
1494         return ip6_route_add(&rtmsg, nlh, arg);
1495 }
1496
1497 struct rt6_rtnl_dump_arg
1498 {
1499         struct sk_buff *skb;
1500         struct netlink_callback *cb;
1501 };
1502
1503 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1504                          struct in6_addr *dst,
1505                          struct in6_addr *src,
1506                          int iif,
1507                          int type, u32 pid, u32 seq,
1508                          struct nlmsghdr *in_nlh, int prefix)
1509 {
1510         struct rtmsg *rtm;
1511         struct nlmsghdr  *nlh;
1512         unsigned char    *b = skb->tail;
1513         struct rta_cacheinfo ci;
1514
1515         if (prefix) {   /* user wants prefix routes only */
1516                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1517                         /* success since this is not a prefix route */
1518                         return 1;
1519                 }
1520         }
1521
1522         if (!pid && in_nlh) {
1523                 pid = in_nlh->nlmsg_pid;
1524         }
1525
1526         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1527         rtm = NLMSG_DATA(nlh);
1528         rtm->rtm_family = AF_INET6;
1529         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1530         rtm->rtm_src_len = rt->rt6i_src.plen;
1531         rtm->rtm_tos = 0;
1532         rtm->rtm_table = RT_TABLE_MAIN;
1533         if (rt->rt6i_flags&RTF_REJECT)
1534                 rtm->rtm_type = RTN_UNREACHABLE;
1535         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1536                 rtm->rtm_type = RTN_LOCAL;
1537         else
1538                 rtm->rtm_type = RTN_UNICAST;
1539         rtm->rtm_flags = 0;
1540         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1541         rtm->rtm_protocol = rt->rt6i_protocol;
1542         if (rt->rt6i_flags&RTF_DYNAMIC)
1543                 rtm->rtm_protocol = RTPROT_REDIRECT;
1544         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1545                 rtm->rtm_protocol = RTPROT_KERNEL;
1546         else if (rt->rt6i_flags&RTF_DEFAULT)
1547                 rtm->rtm_protocol = RTPROT_RA;
1548
1549         if (rt->rt6i_flags&RTF_CACHE)
1550                 rtm->rtm_flags |= RTM_F_CLONED;
1551
1552         if (dst) {
1553                 RTA_PUT(skb, RTA_DST, 16, dst);
1554                 rtm->rtm_dst_len = 128;
1555         } else if (rtm->rtm_dst_len)
1556                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1557 #ifdef CONFIG_IPV6_SUBTREES
1558         if (src) {
1559                 RTA_PUT(skb, RTA_SRC, 16, src);
1560                 rtm->rtm_src_len = 128;
1561         } else if (rtm->rtm_src_len)
1562                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1563 #endif
1564         if (iif)
1565                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1566         else if (dst) {
1567                 struct in6_addr saddr_buf;
1568                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1569                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1570         }
1571         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1572                 goto rtattr_failure;
1573         if (rt->u.dst.neighbour)
1574                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1575         if (rt->u.dst.dev)
1576                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1577         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1578         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1579         if (rt->rt6i_expires)
1580                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1581         else
1582                 ci.rta_expires = 0;
1583         ci.rta_used = rt->u.dst.__use;
1584         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1585         ci.rta_error = rt->u.dst.error;
1586         ci.rta_id = 0;
1587         ci.rta_ts = 0;
1588         ci.rta_tsage = 0;
1589         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1590         nlh->nlmsg_len = skb->tail - b;
1591         return skb->len;
1592
1593 nlmsg_failure:
1594 rtattr_failure:
1595         skb_trim(skb, b - skb->data);
1596         return -1;
1597 }
1598
1599 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1600 {
1601         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1602         int prefix;
1603
1604         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1605                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1606                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1607         } else
1608                 prefix = 0;
1609
1610         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1611                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1612                      NULL, prefix);
1613 }
1614
1615 static int fib6_dump_node(struct fib6_walker_t *w)
1616 {
1617         int res;
1618         struct rt6_info *rt;
1619
1620         for (rt = w->leaf; rt; rt = rt->u.next) {
1621                 res = rt6_dump_route(rt, w->args);
1622                 if (res < 0) {
1623                         /* Frame is full, suspend walking */
1624                         w->leaf = rt;
1625                         return 1;
1626                 }
1627                 BUG_TRAP(res!=0);
1628         }
1629         w->leaf = NULL;
1630         return 0;
1631 }
1632
1633 static void fib6_dump_end(struct netlink_callback *cb)
1634 {
1635         struct fib6_walker_t *w = (void*)cb->args[0];
1636
1637         if (w) {
1638                 cb->args[0] = 0;
1639                 fib6_walker_unlink(w);
1640                 kfree(w);
1641         }
1642         if (cb->args[1]) {
1643                 cb->done = (void*)cb->args[1];
1644                 cb->args[1] = 0;
1645         }
1646 }
1647
1648 static int fib6_dump_done(struct netlink_callback *cb)
1649 {
1650         fib6_dump_end(cb);
1651         return cb->done(cb);
1652 }
1653
1654 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1655 {
1656         struct rt6_rtnl_dump_arg arg;
1657         struct fib6_walker_t *w;
1658         int res;
1659
1660         arg.skb = skb;
1661         arg.cb = cb;
1662
1663         w = (void*)cb->args[0];
1664         if (w == NULL) {
1665                 /* New dump:
1666                  * 
1667                  * 1. hook callback destructor.
1668                  */
1669                 cb->args[1] = (long)cb->done;
1670                 cb->done = fib6_dump_done;
1671
1672                 /*
1673                  * 2. allocate and initialize walker.
1674                  */
1675                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1676                 if (w == NULL)
1677                         return -ENOMEM;
1678                 RT6_TRACE("dump<%p", w);
1679                 memset(w, 0, sizeof(*w));
1680                 w->root = &ip6_routing_table;
1681                 w->func = fib6_dump_node;
1682                 w->args = &arg;
1683                 cb->args[0] = (long)w;
1684                 read_lock_bh(&rt6_lock);
1685                 res = fib6_walk(w);
1686                 read_unlock_bh(&rt6_lock);
1687         } else {
1688                 w->args = &arg;
1689                 read_lock_bh(&rt6_lock);
1690                 res = fib6_walk_continue(w);
1691                 read_unlock_bh(&rt6_lock);
1692         }
1693 #if RT6_DEBUG >= 3
1694         if (res <= 0 && skb->len == 0)
1695                 RT6_TRACE("%p>dump end\n", w);
1696 #endif
1697         res = res < 0 ? res : skb->len;
1698         /* res < 0 is an error. (really, impossible)
1699            res == 0 means that dump is complete, but skb still can contain data.
1700            res > 0 dump is not complete, but frame is full.
1701          */
1702         /* Destroy walker, if dump of this table is complete. */
1703         if (res <= 0)
1704                 fib6_dump_end(cb);
1705         return res;
1706 }
1707
1708 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1709 {
1710         struct rtattr **rta = arg;
1711         int iif = 0;
1712         int err = -ENOBUFS;
1713         struct sk_buff *skb;
1714         struct flowi fl;
1715         struct rt6_info *rt;
1716
1717         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1718         if (skb == NULL)
1719                 goto out;
1720
1721         /* Reserve room for dummy headers, this skb can pass
1722            through good chunk of routing engine.
1723          */
1724         skb->mac.raw = skb->data;
1725         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1726
1727         memset(&fl, 0, sizeof(fl));
1728         if (rta[RTA_SRC-1])
1729                 ipv6_addr_copy(&fl.fl6_src,
1730                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1731         if (rta[RTA_DST-1])
1732                 ipv6_addr_copy(&fl.fl6_dst,
1733                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1734
1735         if (rta[RTA_IIF-1])
1736                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1737
1738         if (iif) {
1739                 struct net_device *dev;
1740                 dev = __dev_get_by_index(iif);
1741                 if (!dev) {
1742                         err = -ENODEV;
1743                         goto out_free;
1744                 }
1745         }
1746
1747         fl.oif = 0;
1748         if (rta[RTA_OIF-1])
1749                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1750
1751         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1752
1753         skb->dst = &rt->u.dst;
1754
1755         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1756         err = rt6_fill_node(skb, rt, 
1757                             &fl.fl6_dst, &fl.fl6_src,
1758                             iif,
1759                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1760                             nlh->nlmsg_seq, nlh, 0);
1761         if (err < 0) {
1762                 err = -EMSGSIZE;
1763                 goto out_free;
1764         }
1765
1766         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1767         if (err > 0)
1768                 err = 0;
1769 out:
1770         return err;
1771 out_free:
1772         kfree_skb(skb);
1773         goto out;       
1774 }
1775
1776 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1777 {
1778         struct sk_buff *skb;
1779         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1780
1781         skb = alloc_skb(size, gfp_any());
1782         if (!skb) {
1783                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1784                 return;
1785         }
1786         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1787                 kfree_skb(skb);
1788                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1789                 return;
1790         }
1791         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1792         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1793 }
1794
1795 /*
1796  *      /proc
1797  */
1798
1799 #ifdef CONFIG_PROC_FS
1800
1801 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1802
1803 struct rt6_proc_arg
1804 {
1805         char *buffer;
1806         int offset;
1807         int length;
1808         int skip;
1809         int len;
1810 };
1811
1812 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1813 {
1814         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1815         int i;
1816
1817         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1818                 arg->skip++;
1819                 return 0;
1820         }
1821
1822         if (arg->len >= arg->length)
1823                 return 0;
1824
1825         for (i=0; i<16; i++) {
1826                 sprintf(arg->buffer + arg->len, "%02x",
1827                         rt->rt6i_dst.addr.s6_addr[i]);
1828                 arg->len += 2;
1829         }
1830         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1831                             rt->rt6i_dst.plen);
1832
1833 #ifdef CONFIG_IPV6_SUBTREES
1834         for (i=0; i<16; i++) {
1835                 sprintf(arg->buffer + arg->len, "%02x",
1836                         rt->rt6i_src.addr.s6_addr[i]);
1837                 arg->len += 2;
1838         }
1839         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1840                             rt->rt6i_src.plen);
1841 #else
1842         sprintf(arg->buffer + arg->len,
1843                 "00000000000000000000000000000000 00 ");
1844         arg->len += 36;
1845 #endif
1846
1847         if (rt->rt6i_nexthop) {
1848                 for (i=0; i<16; i++) {
1849                         sprintf(arg->buffer + arg->len, "%02x",
1850                                 rt->rt6i_nexthop->primary_key[i]);
1851                         arg->len += 2;
1852                 }
1853         } else {
1854                 sprintf(arg->buffer + arg->len,
1855                         "00000000000000000000000000000000");
1856                 arg->len += 32;
1857         }
1858         arg->len += sprintf(arg->buffer + arg->len,
1859                             " %08x %08x %08x %08x %8s\n",
1860                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1861                             rt->u.dst.__use, rt->rt6i_flags, 
1862                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1863         return 0;
1864 }
1865
1866 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1867 {
1868         struct rt6_proc_arg arg;
1869         arg.buffer = buffer;
1870         arg.offset = offset;
1871         arg.length = length;
1872         arg.skip = 0;
1873         arg.len = 0;
1874
1875         read_lock_bh(&rt6_lock);
1876         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1877         read_unlock_bh(&rt6_lock);
1878
1879         *start = buffer;
1880         if (offset)
1881                 *start += offset % RT6_INFO_LEN;
1882
1883         arg.len -= offset % RT6_INFO_LEN;
1884
1885         if (arg.len > length)
1886                 arg.len = length;
1887         if (arg.len < 0)
1888                 arg.len = 0;
1889
1890         return arg.len;
1891 }
1892
1893 extern struct rt6_statistics rt6_stats;
1894
1895 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1896 {
1897         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1898                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1899                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1900                       rt6_stats.fib_rt_cache,
1901                       atomic_read(&ip6_dst_ops.entries),
1902                       rt6_stats.fib_discarded_routes);
1903
1904         return 0;
1905 }
1906
1907 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1908 {
1909         return single_open(file, rt6_stats_seq_show, NULL);
1910 }
1911
1912 static struct file_operations rt6_stats_seq_fops = {
1913         .owner   = THIS_MODULE,
1914         .open    = rt6_stats_seq_open,
1915         .read    = seq_read,
1916         .llseek  = seq_lseek,
1917         .release = single_release,
1918 };
1919 #endif  /* CONFIG_PROC_FS */
1920
1921 #ifdef CONFIG_SYSCTL
1922
1923 static int flush_delay;
1924
1925 static
1926 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1927                               void __user *buffer, size_t *lenp, loff_t *ppos)
1928 {
1929         if (write) {
1930                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1931                 if (flush_delay < 0)
1932                         flush_delay = 0;
1933                 fib6_run_gc((unsigned long)flush_delay);
1934                 return 0;
1935         } else
1936                 return -EINVAL;
1937 }
1938
1939 ctl_table ipv6_route_table[] = {
1940         {
1941                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1942                 .procname       =       "flush",
1943                 .data           =       &flush_delay,
1944                 .maxlen         =       sizeof(int),
1945                 .mode           =       0644,
1946                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1947         },
1948         {
1949                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1950                 .procname       =       "gc_thresh",
1951                 .data           =       &ip6_dst_ops.gc_thresh,
1952                 .maxlen         =       sizeof(int),
1953                 .mode           =       0644,
1954                 .proc_handler   =       &proc_dointvec,
1955         },
1956         {
1957                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1958                 .procname       =       "max_size",
1959                 .data           =       &ip6_rt_max_size,
1960                 .maxlen         =       sizeof(int),
1961                 .mode           =       0644,
1962                 .proc_handler   =       &proc_dointvec,
1963         },
1964         {
1965                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1966                 .procname       =       "gc_min_interval",
1967                 .data           =       &ip6_rt_gc_min_interval,
1968                 .maxlen         =       sizeof(int),
1969                 .mode           =       0644,
1970                 .proc_handler   =       &proc_dointvec_jiffies,
1971                 .strategy       =       &sysctl_jiffies,
1972         },
1973         {
1974                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1975                 .procname       =       "gc_timeout",
1976                 .data           =       &ip6_rt_gc_timeout,
1977                 .maxlen         =       sizeof(int),
1978                 .mode           =       0644,
1979                 .proc_handler   =       &proc_dointvec_jiffies,
1980                 .strategy       =       &sysctl_jiffies,
1981         },
1982         {
1983                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
1984                 .procname       =       "gc_interval",
1985                 .data           =       &ip6_rt_gc_interval,
1986                 .maxlen         =       sizeof(int),
1987                 .mode           =       0644,
1988                 .proc_handler   =       &proc_dointvec_jiffies,
1989                 .strategy       =       &sysctl_jiffies,
1990         },
1991         {
1992                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
1993                 .procname       =       "gc_elasticity",
1994                 .data           =       &ip6_rt_gc_elasticity,
1995                 .maxlen         =       sizeof(int),
1996                 .mode           =       0644,
1997                 .proc_handler   =       &proc_dointvec_jiffies,
1998                 .strategy       =       &sysctl_jiffies,
1999         },
2000         {
2001                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2002                 .procname       =       "mtu_expires",
2003                 .data           =       &ip6_rt_mtu_expires,
2004                 .maxlen         =       sizeof(int),
2005                 .mode           =       0644,
2006                 .proc_handler   =       &proc_dointvec_jiffies,
2007                 .strategy       =       &sysctl_jiffies,
2008         },
2009         {
2010                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2011                 .procname       =       "min_adv_mss",
2012                 .data           =       &ip6_rt_min_advmss,
2013                 .maxlen         =       sizeof(int),
2014                 .mode           =       0644,
2015                 .proc_handler   =       &proc_dointvec_jiffies,
2016                 .strategy       =       &sysctl_jiffies,
2017         },
2018         { .ctl_name = 0 }
2019 };
2020
2021 #endif
2022
2023 void __init ip6_route_init(void)
2024 {
2025         struct proc_dir_entry *p;
2026
2027         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2028                                                      sizeof(struct rt6_info),
2029                                                      0, SLAB_HWCACHE_ALIGN,
2030                                                      NULL, NULL);
2031         if (!ip6_dst_ops.kmem_cachep)
2032                 panic("cannot create ip6_dst_cache");
2033
2034         fib6_init();
2035 #ifdef  CONFIG_PROC_FS
2036         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2037         if (p)
2038                 p->owner = THIS_MODULE;
2039
2040         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2041 #endif
2042 #ifdef CONFIG_XFRM
2043         xfrm6_init();
2044 #endif
2045 }
2046
2047 void __exit ip6_route_cleanup(void)
2048 {
2049 #ifdef CONFIG_PROC_FS
2050         proc_net_remove("ipv6_route");
2051         proc_net_remove("rt6_stats");
2052 #endif
2053 #ifdef CONFIG_XFRM
2054         xfrm6_fini();
2055 #endif
2056         rt6_ifdown(NULL);
2057         fib6_gc_cleanup();
2058         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2059 }