Merge to Fedora kernel-2.6.7-1.492
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *, int how);
88 static int               ip6_dst_gc(void);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void             ip6_link_failure(struct sk_buff *skb);
93 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
94
95 static struct dst_ops ip6_dst_ops = {
96         .family                 =       AF_INET6,
97         .protocol               =       __constant_htons(ETH_P_IPV6),
98         .gc                     =       ip6_dst_gc,
99         .gc_thresh              =       1024,
100         .check                  =       ip6_dst_check,
101         .destroy                =       ip6_dst_destroy,
102         .ifdown                 =       ip6_dst_ifdown,
103         .negative_advice        =       ip6_negative_advice,
104         .link_failure           =       ip6_link_failure,
105         .update_pmtu            =       ip6_rt_update_pmtu,
106         .entry_size             =       sizeof(struct rt6_info),
107 };
108
109 struct rt6_info ip6_null_entry = {
110         .u = {
111                 .dst = {
112                         .__refcnt       = ATOMIC_INIT(1),
113                         .__use          = 1,
114                         .dev            = &loopback_dev,
115                         .obsolete       = -1,
116                         .error          = -ENETUNREACH,
117                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
118                         .input          = ip6_pkt_discard,
119                         .output         = ip6_pkt_discard_out,
120                         .ops            = &ip6_dst_ops,
121                         .path           = (struct dst_entry*)&ip6_null_entry,
122                 }
123         },
124         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
125         .rt6i_metric    = ~(u32) 0,
126         .rt6i_ref       = ATOMIC_INIT(1),
127 };
128
129 struct fib6_node ip6_routing_table = {
130         .leaf           = &ip6_null_entry,
131         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
132 };
133
134 /* Protects all the ip6 fib */
135
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
137
138
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
141 {
142         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
143 }
144
145 static void ip6_dst_destroy(struct dst_entry *dst)
146 {
147         struct rt6_info *rt = (struct rt6_info *)dst;
148         struct inet6_dev *idev = rt->rt6i_idev;
149
150         if (idev != NULL) {
151                 rt->rt6i_idev = NULL;
152                 in6_dev_put(idev);
153         }       
154 }
155
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
157 {
158         ip6_dst_destroy(dst);
159 }
160
161 /*
162  *      Route lookup. Any rt6_lock is implied.
163  */
164
165 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
166                                                     int oif,
167                                                     int strict)
168 {
169         struct rt6_info *local = NULL;
170         struct rt6_info *sprt;
171
172         if (oif) {
173                 for (sprt = rt; sprt; sprt = sprt->u.next) {
174                         struct net_device *dev = sprt->rt6i_dev;
175                         if (dev->ifindex == oif)
176                                 return sprt;
177                         if (dev->flags&IFF_LOOPBACK)
178                                 local = sprt;
179                 }
180
181                 if (local)
182                         return local;
183
184                 if (strict)
185                         return &ip6_null_entry;
186         }
187         return rt;
188 }
189
190 /*
191  *      pointer to the last default router chosen. BH is disabled locally.
192  */
193 static struct rt6_info *rt6_dflt_pointer;
194 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
195
196 /* Default Router Selection (RFC 2461 6.3.6) */
197 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
198 {
199         struct rt6_info *match = NULL;
200         struct rt6_info *sprt;
201         int mpri = 0;
202
203         for (sprt = rt; sprt; sprt = sprt->u.next) {
204                 struct neighbour *neigh;
205                 int m = 0;
206
207                 if (!oif ||
208                     (sprt->rt6i_dev &&
209                      sprt->rt6i_dev->ifindex == oif))
210                         m += 8;
211
212                 if (sprt == rt6_dflt_pointer)
213                         m += 4;
214
215                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
216                         read_lock_bh(&neigh->lock);
217                         switch (neigh->nud_state) {
218                         case NUD_REACHABLE:
219                                 m += 3;
220                                 break;
221
222                         case NUD_STALE:
223                         case NUD_DELAY:
224                         case NUD_PROBE:
225                                 m += 2;
226                                 break;
227
228                         case NUD_NOARP:
229                         case NUD_PERMANENT:
230                                 m += 1;
231                                 break;
232
233                         case NUD_INCOMPLETE:
234                         default:
235                                 read_unlock_bh(&neigh->lock);
236                                 continue;
237                         }
238                         read_unlock_bh(&neigh->lock);
239                 } else {
240                         continue;
241                 }
242
243                 if (m > mpri || m >= 12) {
244                         match = sprt;
245                         mpri = m;
246                         if (m >= 12) {
247                                 /* we choose the last default router if it
248                                  * is in (probably) reachable state.
249                                  * If route changed, we should do pmtu
250                                  * discovery. --yoshfuji
251                                  */
252                                 break;
253                         }
254                 }
255         }
256
257         spin_lock(&rt6_dflt_lock);
258         if (!match) {
259                 /*
260                  *      No default routers are known to be reachable.
261                  *      SHOULD round robin
262                  */
263                 if (rt6_dflt_pointer) {
264                         for (sprt = rt6_dflt_pointer->u.next;
265                              sprt; sprt = sprt->u.next) {
266                                 if (sprt->u.dst.obsolete <= 0 &&
267                                     sprt->u.dst.error == 0) {
268                                         match = sprt;
269                                         break;
270                                 }
271                         }
272                         for (sprt = rt;
273                              !match && sprt;
274                              sprt = sprt->u.next) {
275                                 if (sprt->u.dst.obsolete <= 0 &&
276                                     sprt->u.dst.error == 0) {
277                                         match = sprt;
278                                         break;
279                                 }
280                                 if (sprt == rt6_dflt_pointer)
281                                         break;
282                         }
283                 }
284         }
285
286         if (match) {
287                 if (rt6_dflt_pointer != match)
288                         RT6_TRACE("changed default router: %p->%p\n",
289                                   rt6_dflt_pointer, match);
290                 rt6_dflt_pointer = match;
291         }
292         spin_unlock(&rt6_dflt_lock);
293
294         if (!match) {
295                 /*
296                  * Last Resort: if no default routers found, 
297                  * use addrconf default route.
298                  * We don't record this route.
299                  */
300                 for (sprt = ip6_routing_table.leaf;
301                      sprt; sprt = sprt->u.next) {
302                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
303                             (!oif ||
304                              (sprt->rt6i_dev &&
305                               sprt->rt6i_dev->ifindex == oif))) {
306                                 match = sprt;
307                                 break;
308                         }
309                 }
310                 if (!match) {
311                         /* no default route.  give up. */
312                         match = &ip6_null_entry;
313                 }
314         }
315
316         return match;
317 }
318
319 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
320                             int oif, int strict)
321 {
322         struct fib6_node *fn;
323         struct rt6_info *rt;
324
325         read_lock_bh(&rt6_lock);
326         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
327         rt = rt6_device_match(fn->leaf, oif, strict);
328         dst_hold(&rt->u.dst);
329         rt->u.dst.__use++;
330         read_unlock_bh(&rt6_lock);
331
332         rt->u.dst.lastuse = jiffies;
333         if (rt->u.dst.error == 0)
334                 return rt;
335         dst_release(&rt->u.dst);
336         return NULL;
337 }
338
339 /* rt6_ins is called with FREE rt6_lock.
340    It takes new route entry, the addition fails by any reason the
341    route is freed. In any case, if caller does not hold it, it may
342    be destroyed.
343  */
344
345 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
346 {
347         int err;
348
349         write_lock_bh(&rt6_lock);
350         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
351         write_unlock_bh(&rt6_lock);
352
353         return err;
354 }
355
356 /* No rt6_lock! If COW failed, the function returns dead route entry
357    with dst->error set to errno value.
358  */
359
360 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
361                                 struct in6_addr *saddr)
362 {
363         int err;
364         struct rt6_info *rt;
365
366         /*
367          *      Clone the route.
368          */
369
370         rt = ip6_rt_copy(ort);
371
372         if (rt) {
373                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
374
375                 if (!(rt->rt6i_flags&RTF_GATEWAY))
376                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
377
378                 rt->rt6i_dst.plen = 128;
379                 rt->rt6i_flags |= RTF_CACHE;
380                 rt->u.dst.flags |= DST_HOST;
381
382 #ifdef CONFIG_IPV6_SUBTREES
383                 if (rt->rt6i_src.plen && saddr) {
384                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
385                         rt->rt6i_src.plen = 128;
386                 }
387 #endif
388
389                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
390
391                 dst_hold(&rt->u.dst);
392
393                 err = rt6_ins(rt, NULL, NULL);
394                 if (err == 0)
395                         return rt;
396
397                 rt->u.dst.error = err;
398
399                 return rt;
400         }
401         dst_hold(&ip6_null_entry.u.dst);
402         return &ip6_null_entry;
403 }
404
405 #define BACKTRACK() \
406 if (rt == &ip6_null_entry && strict) { \
407        while ((fn = fn->parent) != NULL) { \
408                 if (fn->fn_flags & RTN_ROOT) { \
409                         dst_hold(&rt->u.dst); \
410                         goto out; \
411                 } \
412                 if (fn->fn_flags & RTN_RTINFO) \
413                         goto restart; \
414         } \
415 }
416
417
418 void ip6_route_input(struct sk_buff *skb)
419 {
420         struct fib6_node *fn;
421         struct rt6_info *rt;
422         int strict;
423         int attempts = 3;
424
425         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
426
427 relookup:
428         read_lock_bh(&rt6_lock);
429
430         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
431                          &skb->nh.ipv6h->saddr);
432
433 restart:
434         rt = fn->leaf;
435
436         if ((rt->rt6i_flags & RTF_CACHE)) {
437                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
438                 BACKTRACK();
439                 dst_hold(&rt->u.dst);
440                 goto out;
441         }
442
443         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
444         BACKTRACK();
445
446         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
447                 read_unlock_bh(&rt6_lock);
448
449                 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
450                              &skb->nh.ipv6h->saddr);
451                         
452                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
453                         goto out2;
454                 /* Race condition! In the gap, when rt6_lock was
455                    released someone could insert this route.  Relookup.
456                 */
457                 dst_release(&rt->u.dst);
458                 goto relookup;
459         }
460         dst_hold(&rt->u.dst);
461
462 out:
463         read_unlock_bh(&rt6_lock);
464 out2:
465         rt->u.dst.lastuse = jiffies;
466         rt->u.dst.__use++;
467         skb->dst = (struct dst_entry *) rt;
468 }
469
470 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
471 {
472         struct fib6_node *fn;
473         struct rt6_info *rt;
474         int strict;
475         int attempts = 3;
476
477         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
478
479 relookup:
480         read_lock_bh(&rt6_lock);
481
482         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
483
484 restart:
485         rt = fn->leaf;
486
487         if ((rt->rt6i_flags & RTF_CACHE)) {
488                 rt = rt6_device_match(rt, fl->oif, strict);
489                 BACKTRACK();
490                 dst_hold(&rt->u.dst);
491                 goto out;
492         }
493         if (rt->rt6i_flags & RTF_DEFAULT) {
494                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
495                         rt = rt6_best_dflt(rt, fl->oif);
496         } else {
497                 rt = rt6_device_match(rt, fl->oif, strict);
498                 BACKTRACK();
499         }
500
501         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
502                 read_unlock_bh(&rt6_lock);
503
504                 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
505
506                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
507                         goto out2;
508
509                 /* Race condition! In the gap, when rt6_lock was
510                    released someone could insert this route.  Relookup.
511                 */
512                 dst_release(&rt->u.dst);
513                 goto relookup;
514         }
515         dst_hold(&rt->u.dst);
516
517 out:
518         read_unlock_bh(&rt6_lock);
519 out2:
520         rt->u.dst.lastuse = jiffies;
521         rt->u.dst.__use++;
522         return &rt->u.dst;
523 }
524
525
526 /*
527  *      Destination cache support functions
528  */
529
530 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
531 {
532         struct rt6_info *rt;
533
534         rt = (struct rt6_info *) dst;
535
536         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
537                 return dst;
538
539         dst_release(dst);
540         return NULL;
541 }
542
543 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
544 {
545         struct rt6_info *rt = (struct rt6_info *) dst;
546
547         if (rt) {
548                 if (rt->rt6i_flags & RTF_CACHE)
549                         ip6_del_rt(rt, NULL, NULL);
550                 else
551                         dst_release(dst);
552         }
553         return NULL;
554 }
555
556 static void ip6_link_failure(struct sk_buff *skb)
557 {
558         struct rt6_info *rt;
559
560         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
561
562         rt = (struct rt6_info *) skb->dst;
563         if (rt) {
564                 if (rt->rt6i_flags&RTF_CACHE) {
565                         dst_set_expires(&rt->u.dst, 0);
566                         rt->rt6i_flags |= RTF_EXPIRES;
567                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
568                         rt->rt6i_node->fn_sernum = -1;
569         }
570 }
571
572 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
573 {
574         struct rt6_info *rt6 = (struct rt6_info*)dst;
575
576         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
577                 rt6->rt6i_flags |= RTF_MODIFIED;
578                 if (mtu < IPV6_MIN_MTU)
579                         mtu = IPV6_MIN_MTU;
580                 dst->metrics[RTAX_MTU-1] = mtu;
581         }
582 }
583
584 /* Protected by rt6_lock.  */
585 static struct dst_entry *ndisc_dst_gc_list;
586 static int ipv6_get_mtu(struct net_device *dev);
587 static inline unsigned int ipv6_advmss(unsigned int mtu);
588
589 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
590                                   struct neighbour *neigh,
591                                   struct in6_addr *addr,
592                                   int (*output)(struct sk_buff **))
593 {
594         struct rt6_info *rt = ip6_dst_alloc();
595
596         if (unlikely(rt == NULL))
597                 goto out;
598
599         dev_hold(dev);
600         if (neigh)
601                 neigh_hold(neigh);
602         else
603                 neigh = ndisc_get_neigh(dev, addr);
604
605         rt->rt6i_dev      = dev;
606         rt->rt6i_idev     = in6_dev_get(dev);
607         rt->rt6i_nexthop  = neigh;
608         atomic_set(&rt->u.dst.__refcnt, 1);
609         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
610         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
611         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
612         rt->u.dst.output  = output;
613
614 #if 0   /* there's no chance to use these for ndisc */
615         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
616                                 ? DST_HOST 
617                                 : 0;
618         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
619         rt->rt6i_dst.plen = 128;
620 #endif
621
622         write_lock_bh(&rt6_lock);
623         rt->u.dst.next = ndisc_dst_gc_list;
624         ndisc_dst_gc_list = &rt->u.dst;
625         write_unlock_bh(&rt6_lock);
626
627         fib6_force_start_gc();
628
629 out:
630         return (struct dst_entry *)rt;
631 }
632
633 int ndisc_dst_gc(int *more)
634 {
635         struct dst_entry *dst, *next, **pprev;
636         int freed;
637
638         next = NULL;
639         pprev = &ndisc_dst_gc_list;
640         freed = 0;
641         while ((dst = *pprev) != NULL) {
642                 if (!atomic_read(&dst->__refcnt)) {
643                         *pprev = dst->next;
644                         dst_free(dst);
645                         freed++;
646                 } else {
647                         pprev = &dst->next;
648                         (*more)++;
649                 }
650         }
651
652         return freed;
653 }
654
655 static int ip6_dst_gc(void)
656 {
657         static unsigned expire = 30*HZ;
658         static unsigned long last_gc;
659         unsigned long now = jiffies;
660
661         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
662             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
663                 goto out;
664
665         expire++;
666         fib6_run_gc(expire);
667         last_gc = now;
668         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
669                 expire = ip6_rt_gc_timeout>>1;
670
671 out:
672         expire -= expire>>ip6_rt_gc_elasticity;
673         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
674 }
675
676 /* Clean host part of a prefix. Not necessary in radix tree,
677    but results in cleaner routing tables.
678
679    Remove it only when all the things will work!
680  */
681
682 static int ipv6_get_mtu(struct net_device *dev)
683 {
684         int mtu = IPV6_MIN_MTU;
685         struct inet6_dev *idev;
686
687         idev = in6_dev_get(dev);
688         if (idev) {
689                 mtu = idev->cnf.mtu6;
690                 in6_dev_put(idev);
691         }
692         return mtu;
693 }
694
695 static inline unsigned int ipv6_advmss(unsigned int mtu)
696 {
697         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
698
699         if (mtu < ip6_rt_min_advmss)
700                 mtu = ip6_rt_min_advmss;
701
702         /*
703          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
704          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
705          * IPV6_MAXPLEN is also valid and means: "any MSS, 
706          * rely only on pmtu discovery"
707          */
708         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
709                 mtu = IPV6_MAXPLEN;
710         return mtu;
711 }
712
713 static int ipv6_get_hoplimit(struct net_device *dev)
714 {
715         int hoplimit = ipv6_devconf.hop_limit;
716         struct inet6_dev *idev;
717
718         idev = in6_dev_get(dev);
719         if (idev) {
720                 hoplimit = idev->cnf.hop_limit;
721                 in6_dev_put(idev);
722         }
723         return hoplimit;
724 }
725
726 /*
727  *
728  */
729
730 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
731 {
732         int err;
733         struct rtmsg *r;
734         struct rtattr **rta;
735         struct rt6_info *rt;
736         struct net_device *dev = NULL;
737         int addr_type;
738
739         rta = (struct rtattr **) _rtattr;
740
741         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
742                 return -EINVAL;
743 #ifndef CONFIG_IPV6_SUBTREES
744         if (rtmsg->rtmsg_src_len)
745                 return -EINVAL;
746 #endif
747         if (rtmsg->rtmsg_ifindex) {
748                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
749                 if (!dev)
750                         return -ENODEV;
751         }
752
753         if (rtmsg->rtmsg_metric == 0)
754                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
755
756         rt = ip6_dst_alloc();
757
758         if (rt == NULL)
759                 return -ENOMEM;
760
761         rt->u.dst.obsolete = -1;
762         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
763         if (nlh && (r = NLMSG_DATA(nlh))) {
764                 rt->rt6i_protocol = r->rtm_protocol;
765         } else {
766                 rt->rt6i_protocol = RTPROT_BOOT;
767         }
768
769         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
770
771         if (addr_type & IPV6_ADDR_MULTICAST)
772                 rt->u.dst.input = ip6_mc_input;
773         else
774                 rt->u.dst.input = ip6_forward;
775
776         rt->u.dst.output = ip6_output;
777
778         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
779                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
780         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
781         if (rt->rt6i_dst.plen == 128)
782                rt->u.dst.flags = DST_HOST;
783
784 #ifdef CONFIG_IPV6_SUBTREES
785         ipv6_addr_prefix(&rt->rt6i_src.addr, 
786                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
787         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
788 #endif
789
790         rt->rt6i_metric = rtmsg->rtmsg_metric;
791
792         /* We cannot add true routes via loopback here,
793            they would result in kernel looping; promote them to reject routes
794          */
795         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
796             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
797                 if (dev)
798                         dev_put(dev);
799                 dev = &loopback_dev;
800                 dev_hold(dev);
801                 rt->u.dst.output = ip6_pkt_discard_out;
802                 rt->u.dst.input = ip6_pkt_discard;
803                 rt->u.dst.error = -ENETUNREACH;
804                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
805                 goto install_route;
806         }
807
808         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
809                 struct in6_addr *gw_addr;
810                 int gwa_type;
811
812                 gw_addr = &rtmsg->rtmsg_gateway;
813                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
814                 gwa_type = ipv6_addr_type(gw_addr);
815
816                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
817                         struct rt6_info *grt;
818
819                         /* IPv6 strictly inhibits using not link-local
820                            addresses as nexthop address.
821                            Otherwise, router will not able to send redirects.
822                            It is very good, but in some (rare!) circumstances
823                            (SIT, PtP, NBMA NOARP links) it is handy to allow
824                            some exceptions. --ANK
825                          */
826                         err = -EINVAL;
827                         if (!(gwa_type&IPV6_ADDR_UNICAST))
828                                 goto out;
829
830                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
831
832                         err = -EHOSTUNREACH;
833                         if (grt == NULL)
834                                 goto out;
835                         if (dev) {
836                                 if (dev != grt->rt6i_dev) {
837                                         dst_release(&grt->u.dst);
838                                         goto out;
839                                 }
840                         } else {
841                                 dev = grt->rt6i_dev;
842                                 dev_hold(dev);
843                         }
844                         if (!(grt->rt6i_flags&RTF_GATEWAY))
845                                 err = 0;
846                         dst_release(&grt->u.dst);
847
848                         if (err)
849                                 goto out;
850                 }
851                 err = -EINVAL;
852                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
853                         goto out;
854         }
855
856         err = -ENODEV;
857         if (dev == NULL)
858                 goto out;
859
860         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
861                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
862                 if (IS_ERR(rt->rt6i_nexthop)) {
863                         err = PTR_ERR(rt->rt6i_nexthop);
864                         rt->rt6i_nexthop = NULL;
865                         goto out;
866                 }
867         }
868
869         rt->rt6i_flags = rtmsg->rtmsg_flags;
870
871 install_route:
872         if (rta && rta[RTA_METRICS-1]) {
873                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
874                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
875
876                 while (RTA_OK(attr, attrlen)) {
877                         unsigned flavor = attr->rta_type;
878                         if (flavor) {
879                                 if (flavor > RTAX_MAX) {
880                                         err = -EINVAL;
881                                         goto out;
882                                 }
883                                 rt->u.dst.metrics[flavor-1] =
884                                         *(u32 *)RTA_DATA(attr);
885                         }
886                         attr = RTA_NEXT(attr, attrlen);
887                 }
888         }
889
890         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
891                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
892                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
893                                 IPV6_DEFAULT_MCASTHOPS;
894                 else
895                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
896                                 ipv6_get_hoplimit(dev);
897         }
898
899         if (!rt->u.dst.metrics[RTAX_MTU-1])
900                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
901         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
902                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
903         rt->u.dst.dev = dev;
904         rt->rt6i_idev = in6_dev_get(dev);
905         return rt6_ins(rt, nlh, _rtattr);
906
907 out:
908         if (dev)
909                 dev_put(dev);
910         dst_free((struct dst_entry *) rt);
911         return err;
912 }
913
914 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
915 {
916         int err;
917
918         write_lock_bh(&rt6_lock);
919
920         spin_lock_bh(&rt6_dflt_lock);
921         rt6_dflt_pointer = NULL;
922         spin_unlock_bh(&rt6_dflt_lock);
923
924         dst_release(&rt->u.dst);
925
926         err = fib6_del(rt, nlh, _rtattr);
927         write_unlock_bh(&rt6_lock);
928
929         return err;
930 }
931
932 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
933 {
934         struct fib6_node *fn;
935         struct rt6_info *rt;
936         int err = -ESRCH;
937
938         read_lock_bh(&rt6_lock);
939
940         fn = fib6_locate(&ip6_routing_table,
941                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
942                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
943         
944         if (fn) {
945                 for (rt = fn->leaf; rt; rt = rt->u.next) {
946                         if (rtmsg->rtmsg_ifindex &&
947                             (rt->rt6i_dev == NULL ||
948                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
949                                 continue;
950                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
951                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
952                                 continue;
953                         if (rtmsg->rtmsg_metric &&
954                             rtmsg->rtmsg_metric != rt->rt6i_metric)
955                                 continue;
956                         dst_hold(&rt->u.dst);
957                         read_unlock_bh(&rt6_lock);
958
959                         return ip6_del_rt(rt, nlh, _rtattr);
960                 }
961         }
962         read_unlock_bh(&rt6_lock);
963
964         return err;
965 }
966
967 /*
968  *      Handle redirects
969  */
970 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
971                   struct neighbour *neigh, int on_link)
972 {
973         struct rt6_info *rt, *nrt;
974
975         /* Locate old route to this destination. */
976         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
977
978         if (rt == NULL)
979                 return;
980
981         if (neigh->dev != rt->rt6i_dev)
982                 goto out;
983
984         /* Redirect received -> path was valid.
985            Look, redirects are sent only in response to data packets,
986            so that this nexthop apparently is reachable. --ANK
987          */
988         dst_confirm(&rt->u.dst);
989
990         /* Duplicate redirect: silently ignore. */
991         if (neigh == rt->u.dst.neighbour)
992                 goto out;
993
994         /* Current route is on-link; redirect is always invalid.
995            
996            Seems, previous statement is not true. It could
997            be node, which looks for us as on-link (f.e. proxy ndisc)
998            But then router serving it might decide, that we should
999            know truth 8)8) --ANK (980726).
1000          */
1001         if (!(rt->rt6i_flags&RTF_GATEWAY))
1002                 goto out;
1003
1004         /*
1005          *      RFC 2461 specifies that redirects should only be
1006          *      accepted if they come from the nexthop to the target.
1007          *      Due to the way default routers are chosen, this notion
1008          *      is a bit fuzzy and one might need to check all default
1009          *      routers.
1010          */
1011
1012         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1013                 if (rt->rt6i_flags & RTF_DEFAULT) {
1014                         struct rt6_info *rt1;
1015
1016                         read_lock(&rt6_lock);
1017                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1018                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1019                                         dst_hold(&rt1->u.dst);
1020                                         dst_release(&rt->u.dst);
1021                                         read_unlock(&rt6_lock);
1022                                         rt = rt1;
1023                                         goto source_ok;
1024                                 }
1025                         }
1026                         read_unlock(&rt6_lock);
1027                 }
1028                 if (net_ratelimit())
1029                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1030                                "for redirect target\n");
1031                 goto out;
1032         }
1033
1034 source_ok:
1035
1036         /*
1037          *      We have finally decided to accept it.
1038          */
1039
1040         nrt = ip6_rt_copy(rt);
1041         if (nrt == NULL)
1042                 goto out;
1043
1044         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1045         if (on_link)
1046                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1047
1048         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1049         nrt->rt6i_dst.plen = 128;
1050         nrt->u.dst.flags |= DST_HOST;
1051
1052         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1053         nrt->rt6i_nexthop = neigh_clone(neigh);
1054         /* Reset pmtu, it may be better */
1055         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1056         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1057
1058         if (rt6_ins(nrt, NULL, NULL))
1059                 goto out;
1060
1061         if (rt->rt6i_flags&RTF_CACHE) {
1062                 ip6_del_rt(rt, NULL, NULL);
1063                 return;
1064         }
1065
1066 out:
1067         dst_release(&rt->u.dst);
1068         return;
1069 }
1070
1071 /*
1072  *      Handle ICMP "packet too big" messages
1073  *      i.e. Path MTU discovery
1074  */
1075
1076 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1077                         struct net_device *dev, u32 pmtu)
1078 {
1079         struct rt6_info *rt, *nrt;
1080
1081         if (pmtu < IPV6_MIN_MTU) {
1082                 if (net_ratelimit())
1083                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1084                                pmtu);
1085                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1086                    link MTU if the node receives a Packet Too Big message
1087                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1088                    */
1089                 pmtu = IPV6_MIN_MTU;
1090         }
1091
1092         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1093
1094         if (rt == NULL)
1095                 return;
1096
1097         if (pmtu >= dst_pmtu(&rt->u.dst))
1098                 goto out;
1099
1100         /* New mtu received -> path was valid.
1101            They are sent only in response to data packets,
1102            so that this nexthop apparently is reachable. --ANK
1103          */
1104         dst_confirm(&rt->u.dst);
1105
1106         /* Host route. If it is static, it would be better
1107            not to override it, but add new one, so that
1108            when cache entry will expire old pmtu
1109            would return automatically.
1110          */
1111         if (rt->rt6i_flags & RTF_CACHE) {
1112                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1113                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1114                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1115                 goto out;
1116         }
1117
1118         /* Network route.
1119            Two cases are possible:
1120            1. It is connected route. Action: COW
1121            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1122          */
1123         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1124                 nrt = rt6_cow(rt, daddr, saddr);
1125                 if (!nrt->u.dst.error) {
1126                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1127                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1128                            happened within 5 mins, the recommended timer is 10 mins.
1129                            Here this route expiration time is set to ip6_rt_mtu_expires
1130                            which is 10 mins. After 10 mins the decreased pmtu is expired
1131                            and detecting PMTU increase will be automatically happened.
1132                          */
1133                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1134                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1135                 }
1136                 dst_release(&nrt->u.dst);
1137         } else {
1138                 nrt = ip6_rt_copy(rt);
1139                 if (nrt == NULL)
1140                         goto out;
1141                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1142                 nrt->rt6i_dst.plen = 128;
1143                 nrt->u.dst.flags |= DST_HOST;
1144                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1145                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1146                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1147                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1148                 rt6_ins(nrt, NULL, NULL);
1149         }
1150
1151 out:
1152         dst_release(&rt->u.dst);
1153 }
1154
1155 /*
1156  *      Misc support functions
1157  */
1158
1159 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1160 {
1161         struct rt6_info *rt = ip6_dst_alloc();
1162
1163         if (rt) {
1164                 rt->u.dst.input = ort->u.dst.input;
1165                 rt->u.dst.output = ort->u.dst.output;
1166
1167                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1168                 rt->u.dst.dev = ort->u.dst.dev;
1169                 if (rt->u.dst.dev)
1170                         dev_hold(rt->u.dst.dev);
1171                 rt->rt6i_idev = ort->rt6i_idev;
1172                 if (rt->rt6i_idev)
1173                         in6_dev_hold(rt->rt6i_idev);
1174                 rt->u.dst.lastuse = jiffies;
1175                 rt->rt6i_expires = 0;
1176
1177                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1178                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1179                 rt->rt6i_metric = 0;
1180
1181                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1182 #ifdef CONFIG_IPV6_SUBTREES
1183                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1184 #endif
1185         }
1186         return rt;
1187 }
1188
1189 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1190 {       
1191         struct rt6_info *rt;
1192         struct fib6_node *fn;
1193
1194         fn = &ip6_routing_table;
1195
1196         write_lock_bh(&rt6_lock);
1197         for (rt = fn->leaf; rt; rt=rt->u.next) {
1198                 if (dev == rt->rt6i_dev &&
1199                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1200                         break;
1201         }
1202         if (rt)
1203                 dst_hold(&rt->u.dst);
1204         write_unlock_bh(&rt6_lock);
1205         return rt;
1206 }
1207
1208 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1209                                      struct net_device *dev)
1210 {
1211         struct in6_rtmsg rtmsg;
1212
1213         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1214         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1215         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1216         rtmsg.rtmsg_metric = 1024;
1217         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1218
1219         rtmsg.rtmsg_ifindex = dev->ifindex;
1220
1221         ip6_route_add(&rtmsg, NULL, NULL);
1222         return rt6_get_dflt_router(gwaddr, dev);
1223 }
1224
1225 void rt6_purge_dflt_routers(int last_resort)
1226 {
1227         struct rt6_info *rt;
1228         u32 flags;
1229
1230         if (last_resort)
1231                 flags = RTF_ALLONLINK;
1232         else
1233                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1234
1235 restart:
1236         read_lock_bh(&rt6_lock);
1237         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1238                 if (rt->rt6i_flags & flags) {
1239                         dst_hold(&rt->u.dst);
1240
1241                         spin_lock_bh(&rt6_dflt_lock);
1242                         rt6_dflt_pointer = NULL;
1243                         spin_unlock_bh(&rt6_dflt_lock);
1244
1245                         read_unlock_bh(&rt6_lock);
1246
1247                         ip6_del_rt(rt, NULL, NULL);
1248
1249                         goto restart;
1250                 }
1251         }
1252         read_unlock_bh(&rt6_lock);
1253 }
1254
1255 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1256 {
1257         struct in6_rtmsg rtmsg;
1258         int err;
1259
1260         switch(cmd) {
1261         case SIOCADDRT:         /* Add a route */
1262         case SIOCDELRT:         /* Delete a route */
1263                 if (!capable(CAP_NET_ADMIN))
1264                         return -EPERM;
1265                 err = copy_from_user(&rtmsg, arg,
1266                                      sizeof(struct in6_rtmsg));
1267                 if (err)
1268                         return -EFAULT;
1269                         
1270                 rtnl_lock();
1271                 switch (cmd) {
1272                 case SIOCADDRT:
1273                         err = ip6_route_add(&rtmsg, NULL, NULL);
1274                         break;
1275                 case SIOCDELRT:
1276                         err = ip6_route_del(&rtmsg, NULL, NULL);
1277                         break;
1278                 default:
1279                         err = -EINVAL;
1280                 }
1281                 rtnl_unlock();
1282
1283                 return err;
1284         };
1285
1286         return -EINVAL;
1287 }
1288
1289 /*
1290  *      Drop the packet on the floor
1291  */
1292
1293 int ip6_pkt_discard(struct sk_buff *skb)
1294 {
1295         IP6_INC_STATS(OutNoRoutes);
1296         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1297         kfree_skb(skb);
1298         return 0;
1299 }
1300
1301 int ip6_pkt_discard_out(struct sk_buff **pskb)
1302 {
1303         return ip6_pkt_discard(*pskb);
1304 }
1305
1306 /*
1307  *      Add address
1308  */
1309
1310 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1311 {
1312         struct rt6_info *rt = ip6_dst_alloc();
1313
1314         if (rt == NULL)
1315                 return -ENOMEM;
1316
1317         dev_hold(&loopback_dev);
1318
1319         rt->u.dst.flags = DST_HOST;
1320         rt->u.dst.input = ip6_input;
1321         rt->u.dst.output = ip6_output;
1322         rt->rt6i_dev = &loopback_dev;
1323         rt->rt6i_idev = in6_dev_get(&loopback_dev);
1324         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1325         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1326         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1327         rt->u.dst.obsolete = -1;
1328
1329         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1330         if (!anycast)
1331                 rt->rt6i_flags |= RTF_LOCAL;
1332         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1333         if (rt->rt6i_nexthop == NULL) {
1334                 dst_free((struct dst_entry *) rt);
1335                 return -ENOMEM;
1336         }
1337
1338         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1339         rt->rt6i_dst.plen = 128;
1340         rt6_ins(rt, NULL, NULL);
1341
1342         return 0;
1343 }
1344
1345 /* Delete address. Warning: you should check that this address
1346    disappeared before calling this function.
1347  */
1348
1349 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1350 {
1351         struct rt6_info *rt;
1352         int err = -ENOENT;
1353
1354         rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1355         if (rt) {
1356                 if (rt->rt6i_dst.plen == 128)
1357                         err = ip6_del_rt(rt, NULL, NULL);
1358                 else
1359                         dst_release(&rt->u.dst);
1360         }
1361
1362         return err;
1363 }
1364
1365 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1366 {
1367         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1368             rt != &ip6_null_entry) {
1369                 RT6_TRACE("deleted by ifdown %p\n", rt);
1370                 return -1;
1371         }
1372         return 0;
1373 }
1374
1375 void rt6_ifdown(struct net_device *dev)
1376 {
1377         write_lock_bh(&rt6_lock);
1378         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1379         write_unlock_bh(&rt6_lock);
1380 }
1381
1382 struct rt6_mtu_change_arg
1383 {
1384         struct net_device *dev;
1385         unsigned mtu;
1386 };
1387
1388 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1389 {
1390         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1391         struct inet6_dev *idev;
1392
1393         /* In IPv6 pmtu discovery is not optional,
1394            so that RTAX_MTU lock cannot disable it.
1395            We still use this lock to block changes
1396            caused by addrconf/ndisc.
1397         */
1398
1399         idev = __in6_dev_get(arg->dev);
1400         if (idev == NULL)
1401                 return 0;
1402
1403         /* For administrative MTU increase, there is no way to discover
1404            IPv6 PMTU increase, so PMTU increase should be updated here.
1405            Since RFC 1981 doesn't include administrative MTU increase
1406            update PMTU increase is a MUST. (i.e. jumbo frame)
1407          */
1408         /*
1409            If new MTU is less than route PMTU, this new MTU will be the
1410            lowest MTU in the path, update the route PMTU to reflect PMTU
1411            decreases; if new MTU is greater than route PMTU, and the
1412            old MTU is the lowest MTU in the path, update the route PMTU
1413            to reflect the increase. In this case if the other nodes' MTU
1414            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1415            PMTU discouvery.
1416          */
1417         if (rt->rt6i_dev == arg->dev &&
1418             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1419             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1420              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1421               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1422                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1423         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1424         return 0;
1425 }
1426
1427 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1428 {
1429         struct rt6_mtu_change_arg arg;
1430
1431         arg.dev = dev;
1432         arg.mtu = mtu;
1433         read_lock_bh(&rt6_lock);
1434         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1435         read_unlock_bh(&rt6_lock);
1436 }
1437
1438 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1439                               struct in6_rtmsg *rtmsg)
1440 {
1441         memset(rtmsg, 0, sizeof(*rtmsg));
1442
1443         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1444         rtmsg->rtmsg_src_len = r->rtm_src_len;
1445         rtmsg->rtmsg_flags = RTF_UP;
1446         if (r->rtm_type == RTN_UNREACHABLE)
1447                 rtmsg->rtmsg_flags |= RTF_REJECT;
1448
1449         if (rta[RTA_GATEWAY-1]) {
1450                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1451                         return -EINVAL;
1452                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1453                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1454         }
1455         if (rta[RTA_DST-1]) {
1456                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1457                         return -EINVAL;
1458                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1459         }
1460         if (rta[RTA_SRC-1]) {
1461                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1462                         return -EINVAL;
1463                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1464         }
1465         if (rta[RTA_OIF-1]) {
1466                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1467                         return -EINVAL;
1468                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1469         }
1470         if (rta[RTA_PRIORITY-1]) {
1471                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1472                         return -EINVAL;
1473                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1474         }
1475         return 0;
1476 }
1477
1478 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1479 {
1480         struct rtmsg *r = NLMSG_DATA(nlh);
1481         struct in6_rtmsg rtmsg;
1482
1483         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1484                 return -EINVAL;
1485         return ip6_route_del(&rtmsg, nlh, arg);
1486 }
1487
1488 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1489 {
1490         struct rtmsg *r = NLMSG_DATA(nlh);
1491         struct in6_rtmsg rtmsg;
1492
1493         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1494                 return -EINVAL;
1495         return ip6_route_add(&rtmsg, nlh, arg);
1496 }
1497
1498 struct rt6_rtnl_dump_arg
1499 {
1500         struct sk_buff *skb;
1501         struct netlink_callback *cb;
1502 };
1503
1504 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1505                          struct in6_addr *dst,
1506                          struct in6_addr *src,
1507                          int iif,
1508                          int type, u32 pid, u32 seq,
1509                          struct nlmsghdr *in_nlh, int prefix)
1510 {
1511         struct rtmsg *rtm;
1512         struct nlmsghdr  *nlh;
1513         unsigned char    *b = skb->tail;
1514         struct rta_cacheinfo ci;
1515
1516         if (prefix) {   /* user wants prefix routes only */
1517                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1518                         /* success since this is not a prefix route */
1519                         return 1;
1520                 }
1521         }
1522
1523         if (!pid && in_nlh) {
1524                 pid = in_nlh->nlmsg_pid;
1525         }
1526
1527         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1528         rtm = NLMSG_DATA(nlh);
1529         rtm->rtm_family = AF_INET6;
1530         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1531         rtm->rtm_src_len = rt->rt6i_src.plen;
1532         rtm->rtm_tos = 0;
1533         rtm->rtm_table = RT_TABLE_MAIN;
1534         if (rt->rt6i_flags&RTF_REJECT)
1535                 rtm->rtm_type = RTN_UNREACHABLE;
1536         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1537                 rtm->rtm_type = RTN_LOCAL;
1538         else
1539                 rtm->rtm_type = RTN_UNICAST;
1540         rtm->rtm_flags = 0;
1541         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1542         rtm->rtm_protocol = rt->rt6i_protocol;
1543         if (rt->rt6i_flags&RTF_DYNAMIC)
1544                 rtm->rtm_protocol = RTPROT_REDIRECT;
1545         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1546                 rtm->rtm_protocol = RTPROT_KERNEL;
1547         else if (rt->rt6i_flags&RTF_DEFAULT)
1548                 rtm->rtm_protocol = RTPROT_RA;
1549
1550         if (rt->rt6i_flags&RTF_CACHE)
1551                 rtm->rtm_flags |= RTM_F_CLONED;
1552
1553         if (dst) {
1554                 RTA_PUT(skb, RTA_DST, 16, dst);
1555                 rtm->rtm_dst_len = 128;
1556         } else if (rtm->rtm_dst_len)
1557                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1558 #ifdef CONFIG_IPV6_SUBTREES
1559         if (src) {
1560                 RTA_PUT(skb, RTA_SRC, 16, src);
1561                 rtm->rtm_src_len = 128;
1562         } else if (rtm->rtm_src_len)
1563                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1564 #endif
1565         if (iif)
1566                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1567         else if (dst) {
1568                 struct in6_addr saddr_buf;
1569                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1570                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1571         }
1572         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1573                 goto rtattr_failure;
1574         if (rt->u.dst.neighbour)
1575                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1576         if (rt->u.dst.dev)
1577                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1578         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1579         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1580         if (rt->rt6i_expires)
1581                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1582         else
1583                 ci.rta_expires = 0;
1584         ci.rta_used = rt->u.dst.__use;
1585         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1586         ci.rta_error = rt->u.dst.error;
1587         ci.rta_id = 0;
1588         ci.rta_ts = 0;
1589         ci.rta_tsage = 0;
1590         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1591         nlh->nlmsg_len = skb->tail - b;
1592         return skb->len;
1593
1594 nlmsg_failure:
1595 rtattr_failure:
1596         skb_trim(skb, b - skb->data);
1597         return -1;
1598 }
1599
1600 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1601 {
1602         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1603         int prefix;
1604
1605         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1606                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1607                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1608         } else
1609                 prefix = 0;
1610
1611         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1612                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1613                      NULL, prefix);
1614 }
1615
1616 static int fib6_dump_node(struct fib6_walker_t *w)
1617 {
1618         int res;
1619         struct rt6_info *rt;
1620
1621         for (rt = w->leaf; rt; rt = rt->u.next) {
1622                 res = rt6_dump_route(rt, w->args);
1623                 if (res < 0) {
1624                         /* Frame is full, suspend walking */
1625                         w->leaf = rt;
1626                         return 1;
1627                 }
1628                 BUG_TRAP(res!=0);
1629         }
1630         w->leaf = NULL;
1631         return 0;
1632 }
1633
1634 static void fib6_dump_end(struct netlink_callback *cb)
1635 {
1636         struct fib6_walker_t *w = (void*)cb->args[0];
1637
1638         if (w) {
1639                 cb->args[0] = 0;
1640                 fib6_walker_unlink(w);
1641                 kfree(w);
1642         }
1643         if (cb->args[1]) {
1644                 cb->done = (void*)cb->args[1];
1645                 cb->args[1] = 0;
1646         }
1647 }
1648
1649 static int fib6_dump_done(struct netlink_callback *cb)
1650 {
1651         fib6_dump_end(cb);
1652         return cb->done(cb);
1653 }
1654
1655 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1656 {
1657         struct rt6_rtnl_dump_arg arg;
1658         struct fib6_walker_t *w;
1659         int res;
1660
1661         arg.skb = skb;
1662         arg.cb = cb;
1663
1664         w = (void*)cb->args[0];
1665         if (w == NULL) {
1666                 /* New dump:
1667                  * 
1668                  * 1. hook callback destructor.
1669                  */
1670                 cb->args[1] = (long)cb->done;
1671                 cb->done = fib6_dump_done;
1672
1673                 /*
1674                  * 2. allocate and initialize walker.
1675                  */
1676                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1677                 if (w == NULL)
1678                         return -ENOMEM;
1679                 RT6_TRACE("dump<%p", w);
1680                 memset(w, 0, sizeof(*w));
1681                 w->root = &ip6_routing_table;
1682                 w->func = fib6_dump_node;
1683                 w->args = &arg;
1684                 cb->args[0] = (long)w;
1685                 read_lock_bh(&rt6_lock);
1686                 res = fib6_walk(w);
1687                 read_unlock_bh(&rt6_lock);
1688         } else {
1689                 w->args = &arg;
1690                 read_lock_bh(&rt6_lock);
1691                 res = fib6_walk_continue(w);
1692                 read_unlock_bh(&rt6_lock);
1693         }
1694 #if RT6_DEBUG >= 3
1695         if (res <= 0 && skb->len == 0)
1696                 RT6_TRACE("%p>dump end\n", w);
1697 #endif
1698         res = res < 0 ? res : skb->len;
1699         /* res < 0 is an error. (really, impossible)
1700            res == 0 means that dump is complete, but skb still can contain data.
1701            res > 0 dump is not complete, but frame is full.
1702          */
1703         /* Destroy walker, if dump of this table is complete. */
1704         if (res <= 0)
1705                 fib6_dump_end(cb);
1706         return res;
1707 }
1708
1709 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1710 {
1711         struct rtattr **rta = arg;
1712         int iif = 0;
1713         int err = -ENOBUFS;
1714         struct sk_buff *skb;
1715         struct flowi fl;
1716         struct rt6_info *rt;
1717
1718         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1719         if (skb == NULL)
1720                 goto out;
1721
1722         /* Reserve room for dummy headers, this skb can pass
1723            through good chunk of routing engine.
1724          */
1725         skb->mac.raw = skb->data;
1726         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1727
1728         memset(&fl, 0, sizeof(fl));
1729         if (rta[RTA_SRC-1])
1730                 ipv6_addr_copy(&fl.fl6_src,
1731                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1732         if (rta[RTA_DST-1])
1733                 ipv6_addr_copy(&fl.fl6_dst,
1734                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1735
1736         if (rta[RTA_IIF-1])
1737                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1738
1739         if (iif) {
1740                 struct net_device *dev;
1741                 dev = __dev_get_by_index(iif);
1742                 if (!dev) {
1743                         err = -ENODEV;
1744                         goto out_free;
1745                 }
1746         }
1747
1748         fl.oif = 0;
1749         if (rta[RTA_OIF-1])
1750                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1751
1752         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1753
1754         skb->dst = &rt->u.dst;
1755
1756         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1757         err = rt6_fill_node(skb, rt, 
1758                             &fl.fl6_dst, &fl.fl6_src,
1759                             iif,
1760                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1761                             nlh->nlmsg_seq, nlh, 0);
1762         if (err < 0) {
1763                 err = -EMSGSIZE;
1764                 goto out_free;
1765         }
1766
1767         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1768         if (err > 0)
1769                 err = 0;
1770 out:
1771         return err;
1772 out_free:
1773         kfree_skb(skb);
1774         goto out;       
1775 }
1776
1777 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1778 {
1779         struct sk_buff *skb;
1780         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1781
1782         skb = alloc_skb(size, gfp_any());
1783         if (!skb) {
1784                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1785                 return;
1786         }
1787         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1788                 kfree_skb(skb);
1789                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1790                 return;
1791         }
1792         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1793         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1794 }
1795
1796 /*
1797  *      /proc
1798  */
1799
1800 #ifdef CONFIG_PROC_FS
1801
1802 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1803
1804 struct rt6_proc_arg
1805 {
1806         char *buffer;
1807         int offset;
1808         int length;
1809         int skip;
1810         int len;
1811 };
1812
1813 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1814 {
1815         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1816         int i;
1817
1818         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1819                 arg->skip++;
1820                 return 0;
1821         }
1822
1823         if (arg->len >= arg->length)
1824                 return 0;
1825
1826         for (i=0; i<16; i++) {
1827                 sprintf(arg->buffer + arg->len, "%02x",
1828                         rt->rt6i_dst.addr.s6_addr[i]);
1829                 arg->len += 2;
1830         }
1831         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1832                             rt->rt6i_dst.plen);
1833
1834 #ifdef CONFIG_IPV6_SUBTREES
1835         for (i=0; i<16; i++) {
1836                 sprintf(arg->buffer + arg->len, "%02x",
1837                         rt->rt6i_src.addr.s6_addr[i]);
1838                 arg->len += 2;
1839         }
1840         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1841                             rt->rt6i_src.plen);
1842 #else
1843         sprintf(arg->buffer + arg->len,
1844                 "00000000000000000000000000000000 00 ");
1845         arg->len += 36;
1846 #endif
1847
1848         if (rt->rt6i_nexthop) {
1849                 for (i=0; i<16; i++) {
1850                         sprintf(arg->buffer + arg->len, "%02x",
1851                                 rt->rt6i_nexthop->primary_key[i]);
1852                         arg->len += 2;
1853                 }
1854         } else {
1855                 sprintf(arg->buffer + arg->len,
1856                         "00000000000000000000000000000000");
1857                 arg->len += 32;
1858         }
1859         arg->len += sprintf(arg->buffer + arg->len,
1860                             " %08x %08x %08x %08x %8s\n",
1861                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1862                             rt->u.dst.__use, rt->rt6i_flags, 
1863                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1864         return 0;
1865 }
1866
1867 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1868 {
1869         struct rt6_proc_arg arg;
1870         arg.buffer = buffer;
1871         arg.offset = offset;
1872         arg.length = length;
1873         arg.skip = 0;
1874         arg.len = 0;
1875
1876         read_lock_bh(&rt6_lock);
1877         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1878         read_unlock_bh(&rt6_lock);
1879
1880         *start = buffer;
1881         if (offset)
1882                 *start += offset % RT6_INFO_LEN;
1883
1884         arg.len -= offset % RT6_INFO_LEN;
1885
1886         if (arg.len > length)
1887                 arg.len = length;
1888         if (arg.len < 0)
1889                 arg.len = 0;
1890
1891         return arg.len;
1892 }
1893
1894 extern struct rt6_statistics rt6_stats;
1895
1896 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1897 {
1898         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1899                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1900                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1901                       rt6_stats.fib_rt_cache,
1902                       atomic_read(&ip6_dst_ops.entries),
1903                       rt6_stats.fib_discarded_routes);
1904
1905         return 0;
1906 }
1907
1908 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1909 {
1910         return single_open(file, rt6_stats_seq_show, NULL);
1911 }
1912
1913 static struct file_operations rt6_stats_seq_fops = {
1914         .owner   = THIS_MODULE,
1915         .open    = rt6_stats_seq_open,
1916         .read    = seq_read,
1917         .llseek  = seq_lseek,
1918         .release = single_release,
1919 };
1920 #endif  /* CONFIG_PROC_FS */
1921
1922 #ifdef CONFIG_SYSCTL
1923
1924 static int flush_delay;
1925
1926 static
1927 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1928                               void __user *buffer, size_t *lenp)
1929 {
1930         if (write) {
1931                 proc_dointvec(ctl, write, filp, buffer, lenp);
1932                 if (flush_delay < 0)
1933                         flush_delay = 0;
1934                 fib6_run_gc((unsigned long)flush_delay);
1935                 return 0;
1936         } else
1937                 return -EINVAL;
1938 }
1939
1940 ctl_table ipv6_route_table[] = {
1941         {
1942                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1943                 .procname       =       "flush",
1944                 .data           =       &flush_delay,
1945                 .maxlen         =       sizeof(int),
1946                 .mode           =       0644,
1947                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1948         },
1949         {
1950                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1951                 .procname       =       "gc_thresh",
1952                 .data           =       &ip6_dst_ops.gc_thresh,
1953                 .maxlen         =       sizeof(int),
1954                 .mode           =       0644,
1955                 .proc_handler   =       &proc_dointvec,
1956         },
1957         {
1958                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1959                 .procname       =       "max_size",
1960                 .data           =       &ip6_rt_max_size,
1961                 .maxlen         =       sizeof(int),
1962                 .mode           =       0644,
1963                 .proc_handler   =       &proc_dointvec,
1964         },
1965         {
1966                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1967                 .procname       =       "gc_min_interval",
1968                 .data           =       &ip6_rt_gc_min_interval,
1969                 .maxlen         =       sizeof(int),
1970                 .mode           =       0644,
1971                 .proc_handler   =       &proc_dointvec_jiffies,
1972                 .strategy       =       &sysctl_jiffies,
1973         },
1974         {
1975                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1976                 .procname       =       "gc_timeout",
1977                 .data           =       &ip6_rt_gc_timeout,
1978                 .maxlen         =       sizeof(int),
1979                 .mode           =       0644,
1980                 .proc_handler   =       &proc_dointvec_jiffies,
1981                 .strategy       =       &sysctl_jiffies,
1982         },
1983         {
1984                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
1985                 .procname       =       "gc_interval",
1986                 .data           =       &ip6_rt_gc_interval,
1987                 .maxlen         =       sizeof(int),
1988                 .mode           =       0644,
1989                 .proc_handler   =       &proc_dointvec_jiffies,
1990                 .strategy       =       &sysctl_jiffies,
1991         },
1992         {
1993                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
1994                 .procname       =       "gc_elasticity",
1995                 .data           =       &ip6_rt_gc_elasticity,
1996                 .maxlen         =       sizeof(int),
1997                 .mode           =       0644,
1998                 .proc_handler   =       &proc_dointvec_jiffies,
1999                 .strategy       =       &sysctl_jiffies,
2000         },
2001         {
2002                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2003                 .procname       =       "mtu_expires",
2004                 .data           =       &ip6_rt_mtu_expires,
2005                 .maxlen         =       sizeof(int),
2006                 .mode           =       0644,
2007                 .proc_handler   =       &proc_dointvec_jiffies,
2008                 .strategy       =       &sysctl_jiffies,
2009         },
2010         {
2011                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2012                 .procname       =       "min_adv_mss",
2013                 .data           =       &ip6_rt_min_advmss,
2014                 .maxlen         =       sizeof(int),
2015                 .mode           =       0644,
2016                 .proc_handler   =       &proc_dointvec_jiffies,
2017                 .strategy       =       &sysctl_jiffies,
2018         },
2019         { .ctl_name = 0 }
2020 };
2021
2022 #endif
2023
2024 void __init ip6_route_init(void)
2025 {
2026         struct proc_dir_entry *p;
2027
2028         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2029                                                      sizeof(struct rt6_info),
2030                                                      0, SLAB_HWCACHE_ALIGN,
2031                                                      NULL, NULL);
2032         if (!ip6_dst_ops.kmem_cachep)
2033                 panic("cannot create ip6_dst_cache");
2034
2035         fib6_init();
2036 #ifdef  CONFIG_PROC_FS
2037         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2038         if (p)
2039                 p->owner = THIS_MODULE;
2040
2041         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2042 #endif
2043 #ifdef CONFIG_XFRM
2044         xfrm6_init();
2045 #endif
2046 }
2047
2048 void __exit ip6_route_cleanup(void)
2049 {
2050 #ifdef CONFIG_PROC_FS
2051         proc_net_remove("ipv6_route");
2052         proc_net_remove("rt6_stats");
2053 #endif
2054 #ifdef CONFIG_XFRM
2055         xfrm6_fini();
2056 #endif
2057         rt6_ifdown(NULL);
2058         fib6_gc_cleanup();
2059         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2060 }