This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static int               ip6_dst_gc(void);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
92
93 static struct dst_ops ip6_dst_ops = {
94         .family                 =       AF_INET6,
95         .protocol               =       __constant_htons(ETH_P_IPV6),
96         .gc                     =       ip6_dst_gc,
97         .gc_thresh              =       1024,
98         .check                  =       ip6_dst_check,
99         .destroy                =       ip6_dst_destroy,
100         .negative_advice        =       ip6_negative_advice,
101         .link_failure           =       ip6_link_failure,
102         .update_pmtu            =       ip6_rt_update_pmtu,
103         .entry_size             =       sizeof(struct rt6_info),
104 };
105
106 struct rt6_info ip6_null_entry = {
107         .u = {
108                 .dst = {
109                         .__refcnt       = ATOMIC_INIT(1),
110                         .__use          = 1,
111                         .dev            = &loopback_dev,
112                         .obsolete       = -1,
113                         .error          = -ENETUNREACH,
114                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
115                         .input          = ip6_pkt_discard,
116                         .output         = ip6_pkt_discard,
117                         .ops            = &ip6_dst_ops,
118                         .path           = (struct dst_entry*)&ip6_null_entry,
119                 }
120         },
121         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
122         .rt6i_metric    = ~(u32) 0,
123         .rt6i_ref       = ATOMIC_INIT(1),
124 };
125
126 struct fib6_node ip6_routing_table = {
127         .leaf           = &ip6_null_entry,
128         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
129 };
130
131 /* Protects all the ip6 fib */
132
133 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
134
135
136 /* allocate dst with ip6_dst_ops */
137 static __inline__ struct rt6_info *ip6_dst_alloc(void)
138 {
139         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
140 }
141
142 static void ip6_dst_destroy(struct dst_entry *dst)
143 {
144         struct rt6_info *rt = (struct rt6_info *)dst;
145         if (rt->rt6i_idev != NULL)
146                 in6_dev_put(rt->rt6i_idev);
147         
148 }
149
150 /*
151  *      Route lookup. Any rt6_lock is implied.
152  */
153
154 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
155                                                     int oif,
156                                                     int strict)
157 {
158         struct rt6_info *local = NULL;
159         struct rt6_info *sprt;
160
161         if (oif) {
162                 for (sprt = rt; sprt; sprt = sprt->u.next) {
163                         struct net_device *dev = sprt->rt6i_dev;
164                         if (dev->ifindex == oif)
165                                 return sprt;
166                         if (dev->flags&IFF_LOOPBACK)
167                                 local = sprt;
168                 }
169
170                 if (local)
171                         return local;
172
173                 if (strict)
174                         return &ip6_null_entry;
175         }
176         return rt;
177 }
178
179 /*
180  *      pointer to the last default router chosen. BH is disabled locally.
181  */
182 static struct rt6_info *rt6_dflt_pointer;
183 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
184
185 /* Default Router Selection (RFC 2461 6.3.6) */
186 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
187 {
188         struct rt6_info *match = NULL;
189         struct rt6_info *sprt;
190         int mpri = 0;
191
192         for (sprt = rt; sprt; sprt = sprt->u.next) {
193                 struct neighbour *neigh;
194                 int m = 0;
195
196                 if (!oif ||
197                     (sprt->rt6i_dev &&
198                      sprt->rt6i_dev->ifindex == oif))
199                         m += 8;
200
201                 if (sprt == rt6_dflt_pointer)
202                         m += 4;
203
204                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
205                         read_lock_bh(&neigh->lock);
206                         switch (neigh->nud_state) {
207                         case NUD_REACHABLE:
208                                 m += 3;
209                                 break;
210
211                         case NUD_STALE:
212                         case NUD_DELAY:
213                         case NUD_PROBE:
214                                 m += 2;
215                                 break;
216
217                         case NUD_NOARP:
218                         case NUD_PERMANENT:
219                                 m += 1;
220                                 break;
221
222                         case NUD_INCOMPLETE:
223                         default:
224                                 read_unlock_bh(&neigh->lock);
225                                 continue;
226                         }
227                         read_unlock_bh(&neigh->lock);
228                 } else {
229                         continue;
230                 }
231
232                 if (m > mpri || m >= 12) {
233                         match = sprt;
234                         mpri = m;
235                         if (m >= 12) {
236                                 /* we choose the last default router if it
237                                  * is in (probably) reachable state.
238                                  * If route changed, we should do pmtu
239                                  * discovery. --yoshfuji
240                                  */
241                                 break;
242                         }
243                 }
244         }
245
246         spin_lock(&rt6_dflt_lock);
247         if (!match) {
248                 /*
249                  *      No default routers are known to be reachable.
250                  *      SHOULD round robin
251                  */
252                 if (rt6_dflt_pointer) {
253                         for (sprt = rt6_dflt_pointer->u.next;
254                              sprt; sprt = sprt->u.next) {
255                                 if (sprt->u.dst.obsolete <= 0 &&
256                                     sprt->u.dst.error == 0) {
257                                         match = sprt;
258                                         break;
259                                 }
260                         }
261                         for (sprt = rt;
262                              !match && sprt;
263                              sprt = sprt->u.next) {
264                                 if (sprt->u.dst.obsolete <= 0 &&
265                                     sprt->u.dst.error == 0) {
266                                         match = sprt;
267                                         break;
268                                 }
269                                 if (sprt == rt6_dflt_pointer)
270                                         break;
271                         }
272                 }
273         }
274
275         if (match) {
276                 if (rt6_dflt_pointer != match)
277                         RT6_TRACE("changed default router: %p->%p\n",
278                                   rt6_dflt_pointer, match);
279                 rt6_dflt_pointer = match;
280         }
281         spin_unlock(&rt6_dflt_lock);
282
283         if (!match) {
284                 /*
285                  * Last Resort: if no default routers found, 
286                  * use addrconf default route.
287                  * We don't record this route.
288                  */
289                 for (sprt = ip6_routing_table.leaf;
290                      sprt; sprt = sprt->u.next) {
291                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
292                             (!oif ||
293                              (sprt->rt6i_dev &&
294                               sprt->rt6i_dev->ifindex == oif))) {
295                                 match = sprt;
296                                 break;
297                         }
298                 }
299                 if (!match) {
300                         /* no default route.  give up. */
301                         match = &ip6_null_entry;
302                 }
303         }
304
305         return match;
306 }
307
308 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
309                             int oif, int strict)
310 {
311         struct fib6_node *fn;
312         struct rt6_info *rt;
313
314         read_lock_bh(&rt6_lock);
315         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
316         rt = rt6_device_match(fn->leaf, oif, strict);
317         dst_hold(&rt->u.dst);
318         rt->u.dst.__use++;
319         read_unlock_bh(&rt6_lock);
320
321         rt->u.dst.lastuse = jiffies;
322         if (rt->u.dst.error == 0)
323                 return rt;
324         dst_release(&rt->u.dst);
325         return NULL;
326 }
327
328 /* rt6_ins is called with FREE rt6_lock.
329    It takes new route entry, the addition fails by any reason the
330    route is freed. In any case, if caller does not hold it, it may
331    be destroyed.
332  */
333
334 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
335 {
336         int err;
337
338         write_lock_bh(&rt6_lock);
339         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
340         write_unlock_bh(&rt6_lock);
341
342         return err;
343 }
344
345 /* No rt6_lock! If COW failed, the function returns dead route entry
346    with dst->error set to errno value.
347  */
348
349 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
350                                 struct in6_addr *saddr)
351 {
352         int err;
353         struct rt6_info *rt;
354
355         /*
356          *      Clone the route.
357          */
358
359         rt = ip6_rt_copy(ort);
360
361         if (rt) {
362                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
363
364                 if (!(rt->rt6i_flags&RTF_GATEWAY))
365                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
366
367                 rt->rt6i_dst.plen = 128;
368                 rt->rt6i_flags |= RTF_CACHE;
369                 rt->u.dst.flags |= DST_HOST;
370
371 #ifdef CONFIG_IPV6_SUBTREES
372                 if (rt->rt6i_src.plen && saddr) {
373                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
374                         rt->rt6i_src.plen = 128;
375                 }
376 #endif
377
378                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
379
380                 dst_hold(&rt->u.dst);
381
382                 err = rt6_ins(rt, NULL, NULL);
383                 if (err == 0)
384                         return rt;
385
386                 rt->u.dst.error = err;
387
388                 return rt;
389         }
390         dst_hold(&ip6_null_entry.u.dst);
391         return &ip6_null_entry;
392 }
393
394 #define BACKTRACK() \
395 if (rt == &ip6_null_entry && strict) { \
396        while ((fn = fn->parent) != NULL) { \
397                 if (fn->fn_flags & RTN_ROOT) { \
398                         dst_hold(&rt->u.dst); \
399                         goto out; \
400                 } \
401                 if (fn->fn_flags & RTN_RTINFO) \
402                         goto restart; \
403         } \
404 }
405
406
407 void ip6_route_input(struct sk_buff *skb)
408 {
409         struct fib6_node *fn;
410         struct rt6_info *rt;
411         int strict;
412         int attempts = 3;
413
414         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
415
416 relookup:
417         read_lock_bh(&rt6_lock);
418
419         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
420                          &skb->nh.ipv6h->saddr);
421
422 restart:
423         rt = fn->leaf;
424
425         if ((rt->rt6i_flags & RTF_CACHE)) {
426                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
427                 BACKTRACK();
428                 dst_hold(&rt->u.dst);
429                 goto out;
430         }
431
432         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
433         BACKTRACK();
434
435         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
436                 read_unlock_bh(&rt6_lock);
437
438                 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
439                              &skb->nh.ipv6h->saddr);
440                         
441                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
442                         goto out2;
443                 /* Race condition! In the gap, when rt6_lock was
444                    released someone could insert this route.  Relookup.
445                 */
446                 dst_release(&rt->u.dst);
447                 goto relookup;
448         }
449         dst_hold(&rt->u.dst);
450
451 out:
452         read_unlock_bh(&rt6_lock);
453 out2:
454         rt->u.dst.lastuse = jiffies;
455         rt->u.dst.__use++;
456         skb->dst = (struct dst_entry *) rt;
457 }
458
459 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
460 {
461         struct fib6_node *fn;
462         struct rt6_info *rt;
463         int strict;
464         int attempts = 3;
465
466         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
467
468 relookup:
469         read_lock_bh(&rt6_lock);
470
471         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
472
473 restart:
474         rt = fn->leaf;
475
476         if ((rt->rt6i_flags & RTF_CACHE)) {
477                 rt = rt6_device_match(rt, fl->oif, strict);
478                 BACKTRACK();
479                 dst_hold(&rt->u.dst);
480                 goto out;
481         }
482         if (rt->rt6i_flags & RTF_DEFAULT) {
483                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
484                         rt = rt6_best_dflt(rt, fl->oif);
485         } else {
486                 rt = rt6_device_match(rt, fl->oif, strict);
487                 BACKTRACK();
488         }
489
490         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
491                 read_unlock_bh(&rt6_lock);
492
493                 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
494
495                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
496                         goto out2;
497
498                 /* Race condition! In the gap, when rt6_lock was
499                    released someone could insert this route.  Relookup.
500                 */
501                 dst_release(&rt->u.dst);
502                 goto relookup;
503         }
504         dst_hold(&rt->u.dst);
505
506 out:
507         read_unlock_bh(&rt6_lock);
508 out2:
509         rt->u.dst.lastuse = jiffies;
510         rt->u.dst.__use++;
511         return &rt->u.dst;
512 }
513
514
515 /*
516  *      Destination cache support functions
517  */
518
519 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
520 {
521         struct rt6_info *rt;
522
523         rt = (struct rt6_info *) dst;
524
525         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
526                 return dst;
527
528         dst_release(dst);
529         return NULL;
530 }
531
532 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
533 {
534         struct rt6_info *rt = (struct rt6_info *) dst;
535
536         if (rt) {
537                 if (rt->rt6i_flags & RTF_CACHE)
538                         ip6_del_rt(rt, NULL, NULL);
539                 else
540                         dst_release(dst);
541         }
542         return NULL;
543 }
544
545 static void ip6_link_failure(struct sk_buff *skb)
546 {
547         struct rt6_info *rt;
548
549         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
550
551         rt = (struct rt6_info *) skb->dst;
552         if (rt) {
553                 if (rt->rt6i_flags&RTF_CACHE) {
554                         dst_set_expires(&rt->u.dst, 0);
555                         rt->rt6i_flags |= RTF_EXPIRES;
556                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
557                         rt->rt6i_node->fn_sernum = -1;
558         }
559 }
560
561 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
562 {
563         struct rt6_info *rt6 = (struct rt6_info*)dst;
564
565         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
566                 rt6->rt6i_flags |= RTF_MODIFIED;
567                 if (mtu < IPV6_MIN_MTU)
568                         mtu = IPV6_MIN_MTU;
569                 dst->metrics[RTAX_MTU-1] = mtu;
570         }
571 }
572
573 /* Protected by rt6_lock.  */
574 static struct dst_entry *ndisc_dst_gc_list;
575
576 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
577                                   struct neighbour *neigh,
578                                   struct in6_addr *addr,
579                                   int (*output)(struct sk_buff *))
580 {
581         struct rt6_info *rt = ip6_dst_alloc();
582
583         if (unlikely(rt == NULL))
584                 goto out;
585
586         dev_hold(dev);
587         if (neigh)
588                 neigh_hold(neigh);
589         else
590                 neigh = ndisc_get_neigh(dev, addr);
591
592         rt->rt6i_dev      = dev;
593         rt->rt6i_idev     = in6_dev_get(dev);
594         rt->rt6i_nexthop  = neigh;
595         rt->rt6i_expires  = 0;
596         rt->rt6i_flags    = RTF_LOCAL;
597         rt->rt6i_metric   = 0;
598         atomic_set(&rt->u.dst.__refcnt, 1);
599         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
600         rt->u.dst.output  = output;
601
602         write_lock_bh(&rt6_lock);
603         rt->u.dst.next = ndisc_dst_gc_list;
604         ndisc_dst_gc_list = &rt->u.dst;
605         write_unlock_bh(&rt6_lock);
606
607         fib6_force_start_gc();
608
609 out:
610         return (struct dst_entry *)rt;
611 }
612
613 int ndisc_dst_gc(int *more)
614 {
615         struct dst_entry *dst, *next, **pprev;
616         int freed;
617
618         next = NULL;
619         pprev = &ndisc_dst_gc_list;
620         freed = 0;
621         while ((dst = *pprev) != NULL) {
622                 if (!atomic_read(&dst->__refcnt)) {
623                         *pprev = dst->next;
624                         dst_free(dst);
625                         freed++;
626                 } else {
627                         pprev = &dst->next;
628                         (*more)++;
629                 }
630         }
631
632         return freed;
633 }
634
635 static int ip6_dst_gc(void)
636 {
637         static unsigned expire = 30*HZ;
638         static unsigned long last_gc;
639         unsigned long now = jiffies;
640
641         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
642             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
643                 goto out;
644
645         expire++;
646         fib6_run_gc(expire);
647         last_gc = now;
648         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
649                 expire = ip6_rt_gc_timeout>>1;
650
651 out:
652         expire -= expire>>ip6_rt_gc_elasticity;
653         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
654 }
655
656 /* Clean host part of a prefix. Not necessary in radix tree,
657    but results in cleaner routing tables.
658
659    Remove it only when all the things will work!
660  */
661
662 static int ipv6_get_mtu(struct net_device *dev)
663 {
664         int mtu = IPV6_MIN_MTU;
665         struct inet6_dev *idev;
666
667         idev = in6_dev_get(dev);
668         if (idev) {
669                 mtu = idev->cnf.mtu6;
670                 in6_dev_put(idev);
671         }
672         return mtu;
673 }
674
675 static inline unsigned int ipv6_advmss(unsigned int mtu)
676 {
677         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
678
679         if (mtu < ip6_rt_min_advmss)
680                 mtu = ip6_rt_min_advmss;
681
682         /*
683          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
684          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
685          * IPV6_MAXPLEN is also valid and means: "any MSS, 
686          * rely only on pmtu discovery"
687          */
688         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
689                 mtu = IPV6_MAXPLEN;
690         return mtu;
691 }
692
693 static int ipv6_get_hoplimit(struct net_device *dev)
694 {
695         int hoplimit = ipv6_devconf.hop_limit;
696         struct inet6_dev *idev;
697
698         idev = in6_dev_get(dev);
699         if (idev) {
700                 hoplimit = idev->cnf.hop_limit;
701                 in6_dev_put(idev);
702         }
703         return hoplimit;
704 }
705
706 /*
707  *
708  */
709
710 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
711 {
712         int err;
713         struct rtmsg *r;
714         struct rtattr **rta;
715         struct rt6_info *rt;
716         struct net_device *dev = NULL;
717         int addr_type;
718
719         rta = (struct rtattr **) _rtattr;
720
721         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
722                 return -EINVAL;
723 #ifndef CONFIG_IPV6_SUBTREES
724         if (rtmsg->rtmsg_src_len)
725                 return -EINVAL;
726 #endif
727         if (rtmsg->rtmsg_ifindex) {
728                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
729                 if (!dev)
730                         return -ENODEV;
731         }
732
733         if (rtmsg->rtmsg_metric == 0)
734                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
735
736         rt = ip6_dst_alloc();
737
738         if (rt == NULL)
739                 return -ENOMEM;
740
741         rt->u.dst.obsolete = -1;
742         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
743         if (nlh && (r = NLMSG_DATA(nlh))) {
744                 rt->rt6i_protocol = r->rtm_protocol;
745         } else {
746                 rt->rt6i_protocol = RTPROT_BOOT;
747         }
748
749         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
750
751         if (addr_type & IPV6_ADDR_MULTICAST)
752                 rt->u.dst.input = ip6_mc_input;
753         else
754                 rt->u.dst.input = ip6_forward;
755
756         rt->u.dst.output = ip6_output;
757
758         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
759                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
760         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
761         if (rt->rt6i_dst.plen == 128)
762                rt->u.dst.flags = DST_HOST;
763
764 #ifdef CONFIG_IPV6_SUBTREES
765         ipv6_addr_prefix(&rt->rt6i_src.addr, 
766                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
767         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
768 #endif
769
770         rt->rt6i_metric = rtmsg->rtmsg_metric;
771
772         /* We cannot add true routes via loopback here,
773            they would result in kernel looping; promote them to reject routes
774          */
775         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
776             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
777                 if (dev)
778                         dev_put(dev);
779                 dev = &loopback_dev;
780                 dev_hold(dev);
781                 rt->u.dst.output = ip6_pkt_discard;
782                 rt->u.dst.input = ip6_pkt_discard;
783                 rt->u.dst.error = -ENETUNREACH;
784                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
785                 goto install_route;
786         }
787
788         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
789                 struct in6_addr *gw_addr;
790                 int gwa_type;
791
792                 gw_addr = &rtmsg->rtmsg_gateway;
793                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
794                 gwa_type = ipv6_addr_type(gw_addr);
795
796                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
797                         struct rt6_info *grt;
798
799                         /* IPv6 strictly inhibits using not link-local
800                            addresses as nexthop address.
801                            Otherwise, router will not able to send redirects.
802                            It is very good, but in some (rare!) circumstances
803                            (SIT, PtP, NBMA NOARP links) it is handy to allow
804                            some exceptions. --ANK
805                          */
806                         err = -EINVAL;
807                         if (!(gwa_type&IPV6_ADDR_UNICAST))
808                                 goto out;
809
810                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
811
812                         err = -EHOSTUNREACH;
813                         if (grt == NULL)
814                                 goto out;
815                         if (dev) {
816                                 if (dev != grt->rt6i_dev) {
817                                         dst_release(&grt->u.dst);
818                                         goto out;
819                                 }
820                         } else {
821                                 dev = grt->rt6i_dev;
822                                 dev_hold(dev);
823                         }
824                         if (!(grt->rt6i_flags&RTF_GATEWAY))
825                                 err = 0;
826                         dst_release(&grt->u.dst);
827
828                         if (err)
829                                 goto out;
830                 }
831                 err = -EINVAL;
832                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
833                         goto out;
834         }
835
836         err = -ENODEV;
837         if (dev == NULL)
838                 goto out;
839
840         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
841                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
842                 if (IS_ERR(rt->rt6i_nexthop)) {
843                         err = PTR_ERR(rt->rt6i_nexthop);
844                         rt->rt6i_nexthop = NULL;
845                         goto out;
846                 }
847         }
848
849         rt->rt6i_flags = rtmsg->rtmsg_flags;
850
851 install_route:
852         if (rta && rta[RTA_METRICS-1]) {
853                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
854                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
855
856                 while (RTA_OK(attr, attrlen)) {
857                         unsigned flavor = attr->rta_type;
858                         if (flavor) {
859                                 if (flavor > RTAX_MAX) {
860                                         err = -EINVAL;
861                                         goto out;
862                                 }
863                                 rt->u.dst.metrics[flavor-1] =
864                                         *(u32 *)RTA_DATA(attr);
865                         }
866                         attr = RTA_NEXT(attr, attrlen);
867                 }
868         }
869
870         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
871                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
872                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
873                                 IPV6_DEFAULT_MCASTHOPS;
874                 else
875                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
876                                 ipv6_get_hoplimit(dev);
877         }
878
879         if (!rt->u.dst.metrics[RTAX_MTU-1])
880                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
881         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
882                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
883         rt->u.dst.dev = dev;
884         rt->rt6i_idev = in6_dev_get(dev);
885         return rt6_ins(rt, nlh, _rtattr);
886
887 out:
888         if (dev)
889                 dev_put(dev);
890         dst_free((struct dst_entry *) rt);
891         return err;
892 }
893
894 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
895 {
896         int err;
897
898         write_lock_bh(&rt6_lock);
899
900         spin_lock_bh(&rt6_dflt_lock);
901         rt6_dflt_pointer = NULL;
902         spin_unlock_bh(&rt6_dflt_lock);
903
904         dst_release(&rt->u.dst);
905
906         err = fib6_del(rt, nlh, _rtattr);
907         write_unlock_bh(&rt6_lock);
908
909         return err;
910 }
911
912 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
913 {
914         struct fib6_node *fn;
915         struct rt6_info *rt;
916         int err = -ESRCH;
917
918         read_lock_bh(&rt6_lock);
919
920         fn = fib6_locate(&ip6_routing_table,
921                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
922                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
923         
924         if (fn) {
925                 for (rt = fn->leaf; rt; rt = rt->u.next) {
926                         if (rtmsg->rtmsg_ifindex &&
927                             (rt->rt6i_dev == NULL ||
928                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
929                                 continue;
930                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
931                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
932                                 continue;
933                         if (rtmsg->rtmsg_metric &&
934                             rtmsg->rtmsg_metric != rt->rt6i_metric)
935                                 continue;
936                         dst_hold(&rt->u.dst);
937                         read_unlock_bh(&rt6_lock);
938
939                         return ip6_del_rt(rt, nlh, _rtattr);
940                 }
941         }
942         read_unlock_bh(&rt6_lock);
943
944         return err;
945 }
946
947 /*
948  *      Handle redirects
949  */
950 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
951                   struct neighbour *neigh, int on_link)
952 {
953         struct rt6_info *rt, *nrt;
954
955         /* Locate old route to this destination. */
956         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
957
958         if (rt == NULL)
959                 return;
960
961         if (neigh->dev != rt->rt6i_dev)
962                 goto out;
963
964         /* Redirect received -> path was valid.
965            Look, redirects are sent only in response to data packets,
966            so that this nexthop apparently is reachable. --ANK
967          */
968         dst_confirm(&rt->u.dst);
969
970         /* Duplicate redirect: silently ignore. */
971         if (neigh == rt->u.dst.neighbour)
972                 goto out;
973
974         /* Current route is on-link; redirect is always invalid.
975            
976            Seems, previous statement is not true. It could
977            be node, which looks for us as on-link (f.e. proxy ndisc)
978            But then router serving it might decide, that we should
979            know truth 8)8) --ANK (980726).
980          */
981         if (!(rt->rt6i_flags&RTF_GATEWAY))
982                 goto out;
983
984         /*
985          *      RFC 2461 specifies that redirects should only be
986          *      accepted if they come from the nexthop to the target.
987          *      Due to the way default routers are chosen, this notion
988          *      is a bit fuzzy and one might need to check all default
989          *      routers.
990          */
991
992         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
993                 if (rt->rt6i_flags & RTF_DEFAULT) {
994                         struct rt6_info *rt1;
995
996                         read_lock(&rt6_lock);
997                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
998                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
999                                         dst_hold(&rt1->u.dst);
1000                                         dst_release(&rt->u.dst);
1001                                         read_unlock(&rt6_lock);
1002                                         rt = rt1;
1003                                         goto source_ok;
1004                                 }
1005                         }
1006                         read_unlock(&rt6_lock);
1007                 }
1008                 if (net_ratelimit())
1009                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1010                                "for redirect target\n");
1011                 goto out;
1012         }
1013
1014 source_ok:
1015
1016         /*
1017          *      We have finally decided to accept it.
1018          */
1019
1020         nrt = ip6_rt_copy(rt);
1021         if (nrt == NULL)
1022                 goto out;
1023
1024         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1025         if (on_link)
1026                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1027
1028         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1029         nrt->rt6i_dst.plen = 128;
1030         nrt->u.dst.flags |= DST_HOST;
1031
1032         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1033         nrt->rt6i_nexthop = neigh_clone(neigh);
1034         /* Reset pmtu, it may be better */
1035         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1036         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1037
1038         if (rt6_ins(nrt, NULL, NULL))
1039                 goto out;
1040
1041         if (rt->rt6i_flags&RTF_CACHE) {
1042                 ip6_del_rt(rt, NULL, NULL);
1043                 return;
1044         }
1045
1046 out:
1047         dst_release(&rt->u.dst);
1048         return;
1049 }
1050
1051 /*
1052  *      Handle ICMP "packet too big" messages
1053  *      i.e. Path MTU discovery
1054  */
1055
1056 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1057                         struct net_device *dev, u32 pmtu)
1058 {
1059         struct rt6_info *rt, *nrt;
1060
1061         if (pmtu < IPV6_MIN_MTU) {
1062                 if (net_ratelimit())
1063                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1064                                pmtu);
1065                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1066                    link MTU if the node receives a Packet Too Big message
1067                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1068                    */
1069                 pmtu = IPV6_MIN_MTU;
1070         }
1071
1072         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1073
1074         if (rt == NULL)
1075                 return;
1076
1077         if (pmtu >= dst_pmtu(&rt->u.dst))
1078                 goto out;
1079
1080         /* New mtu received -> path was valid.
1081            They are sent only in response to data packets,
1082            so that this nexthop apparently is reachable. --ANK
1083          */
1084         dst_confirm(&rt->u.dst);
1085
1086         /* Host route. If it is static, it would be better
1087            not to override it, but add new one, so that
1088            when cache entry will expire old pmtu
1089            would return automatically.
1090          */
1091         if (rt->rt6i_flags & RTF_CACHE) {
1092                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1093                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1094                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1095                 goto out;
1096         }
1097
1098         /* Network route.
1099            Two cases are possible:
1100            1. It is connected route. Action: COW
1101            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1102          */
1103         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1104                 nrt = rt6_cow(rt, daddr, saddr);
1105                 if (!nrt->u.dst.error) {
1106                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1107                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1108                            happened within 5 mins, the recommended timer is 10 mins.
1109                            Here this route expiration time is set to ip6_rt_mtu_expires
1110                            which is 10 mins. After 10 mins the decreased pmtu is expired
1111                            and detecting PMTU increase will be automatically happened.
1112                          */
1113                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1114                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1115                 }
1116                 dst_release(&nrt->u.dst);
1117         } else {
1118                 nrt = ip6_rt_copy(rt);
1119                 if (nrt == NULL)
1120                         goto out;
1121                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1122                 nrt->rt6i_dst.plen = 128;
1123                 nrt->u.dst.flags |= DST_HOST;
1124                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1125                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1126                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1127                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1128                 rt6_ins(nrt, NULL, NULL);
1129         }
1130
1131 out:
1132         dst_release(&rt->u.dst);
1133 }
1134
1135 /*
1136  *      Misc support functions
1137  */
1138
1139 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1140 {
1141         struct rt6_info *rt = ip6_dst_alloc();
1142
1143         if (rt) {
1144                 rt->u.dst.input = ort->u.dst.input;
1145                 rt->u.dst.output = ort->u.dst.output;
1146
1147                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1148                 rt->u.dst.dev = ort->u.dst.dev;
1149                 if (rt->u.dst.dev)
1150                         dev_hold(rt->u.dst.dev);
1151                 rt->rt6i_idev = ort->rt6i_idev;
1152                 if (rt->rt6i_idev)
1153                         in6_dev_hold(rt->rt6i_idev);
1154                 rt->u.dst.lastuse = jiffies;
1155                 rt->rt6i_expires = 0;
1156
1157                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1158                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1159                 rt->rt6i_metric = 0;
1160
1161                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1162 #ifdef CONFIG_IPV6_SUBTREES
1163                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1164 #endif
1165         }
1166         return rt;
1167 }
1168
1169 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1170 {       
1171         struct rt6_info *rt;
1172         struct fib6_node *fn;
1173
1174         fn = &ip6_routing_table;
1175
1176         write_lock_bh(&rt6_lock);
1177         for (rt = fn->leaf; rt; rt=rt->u.next) {
1178                 if (dev == rt->rt6i_dev &&
1179                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1180                         break;
1181         }
1182         if (rt)
1183                 dst_hold(&rt->u.dst);
1184         write_unlock_bh(&rt6_lock);
1185         return rt;
1186 }
1187
1188 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1189                                      struct net_device *dev)
1190 {
1191         struct in6_rtmsg rtmsg;
1192
1193         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1194         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1195         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1196         rtmsg.rtmsg_metric = 1024;
1197         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1198
1199         rtmsg.rtmsg_ifindex = dev->ifindex;
1200
1201         ip6_route_add(&rtmsg, NULL, NULL);
1202         return rt6_get_dflt_router(gwaddr, dev);
1203 }
1204
1205 void rt6_purge_dflt_routers(int last_resort)
1206 {
1207         struct rt6_info *rt;
1208         u32 flags;
1209
1210         if (last_resort)
1211                 flags = RTF_ALLONLINK;
1212         else
1213                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1214
1215 restart:
1216         read_lock_bh(&rt6_lock);
1217         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1218                 if (rt->rt6i_flags & flags) {
1219                         dst_hold(&rt->u.dst);
1220
1221                         spin_lock_bh(&rt6_dflt_lock);
1222                         rt6_dflt_pointer = NULL;
1223                         spin_unlock_bh(&rt6_dflt_lock);
1224
1225                         read_unlock_bh(&rt6_lock);
1226
1227                         ip6_del_rt(rt, NULL, NULL);
1228
1229                         goto restart;
1230                 }
1231         }
1232         read_unlock_bh(&rt6_lock);
1233 }
1234
1235 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1236 {
1237         struct in6_rtmsg rtmsg;
1238         int err;
1239
1240         switch(cmd) {
1241         case SIOCADDRT:         /* Add a route */
1242         case SIOCDELRT:         /* Delete a route */
1243                 if (!capable(CAP_NET_ADMIN))
1244                         return -EPERM;
1245                 err = copy_from_user(&rtmsg, arg,
1246                                      sizeof(struct in6_rtmsg));
1247                 if (err)
1248                         return -EFAULT;
1249                         
1250                 rtnl_lock();
1251                 switch (cmd) {
1252                 case SIOCADDRT:
1253                         err = ip6_route_add(&rtmsg, NULL, NULL);
1254                         break;
1255                 case SIOCDELRT:
1256                         err = ip6_route_del(&rtmsg, NULL, NULL);
1257                         break;
1258                 default:
1259                         err = -EINVAL;
1260                 }
1261                 rtnl_unlock();
1262
1263                 return err;
1264         };
1265
1266         return -EINVAL;
1267 }
1268
1269 /*
1270  *      Drop the packet on the floor
1271  */
1272
1273 int ip6_pkt_discard(struct sk_buff *skb)
1274 {
1275         IP6_INC_STATS(OutNoRoutes);
1276         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1277         kfree_skb(skb);
1278         return 0;
1279 }
1280
1281 /*
1282  *      Add address
1283  */
1284
1285 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1286 {
1287         struct rt6_info *rt = ip6_dst_alloc();
1288
1289         if (rt == NULL)
1290                 return -ENOMEM;
1291
1292         dev_hold(&loopback_dev);
1293
1294         rt->u.dst.flags = DST_HOST;
1295         rt->u.dst.input = ip6_input;
1296         rt->u.dst.output = ip6_output;
1297         rt->rt6i_dev = &loopback_dev;
1298         rt->rt6i_idev = in6_dev_get(&loopback_dev);
1299         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1300         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1301         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1302         rt->u.dst.obsolete = -1;
1303
1304         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1305         if (!anycast)
1306                 rt->rt6i_flags |= RTF_LOCAL;
1307         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1308         if (rt->rt6i_nexthop == NULL) {
1309                 dst_free((struct dst_entry *) rt);
1310                 return -ENOMEM;
1311         }
1312
1313         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1314         rt->rt6i_dst.plen = 128;
1315         rt6_ins(rt, NULL, NULL);
1316
1317         return 0;
1318 }
1319
1320 /* Delete address. Warning: you should check that this address
1321    disappeared before calling this function.
1322  */
1323
1324 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1325 {
1326         struct rt6_info *rt;
1327         int err = -ENOENT;
1328
1329         rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1330         if (rt) {
1331                 if (rt->rt6i_dst.plen == 128)
1332                         err = ip6_del_rt(rt, NULL, NULL);
1333                 else
1334                         dst_release(&rt->u.dst);
1335         }
1336
1337         return err;
1338 }
1339
1340 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1341 {
1342         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1343             rt != &ip6_null_entry) {
1344                 RT6_TRACE("deleted by ifdown %p\n", rt);
1345                 return -1;
1346         }
1347         return 0;
1348 }
1349
1350 void rt6_ifdown(struct net_device *dev)
1351 {
1352         write_lock_bh(&rt6_lock);
1353         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1354         write_unlock_bh(&rt6_lock);
1355 }
1356
1357 struct rt6_mtu_change_arg
1358 {
1359         struct net_device *dev;
1360         unsigned mtu;
1361 };
1362
1363 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1364 {
1365         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1366         struct inet6_dev *idev;
1367
1368         /* In IPv6 pmtu discovery is not optional,
1369            so that RTAX_MTU lock cannot disable it.
1370            We still use this lock to block changes
1371            caused by addrconf/ndisc.
1372         */
1373
1374         idev = __in6_dev_get(arg->dev);
1375         if (idev == NULL)
1376                 return 0;
1377
1378         /* For administrative MTU increase, there is no way to discover
1379            IPv6 PMTU increase, so PMTU increase should be updated here.
1380            Since RFC 1981 doesn't include administrative MTU increase
1381            update PMTU increase is a MUST. (i.e. jumbo frame)
1382          */
1383         /*
1384            If new MTU is less than route PMTU, this new MTU will be the
1385            lowest MTU in the path, update the route PMTU to reflect PMTU
1386            decreases; if new MTU is greater than route PMTU, and the
1387            old MTU is the lowest MTU in the path, update the route PMTU
1388            to reflect the increase. In this case if the other nodes' MTU
1389            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1390            PMTU discouvery.
1391          */
1392         if (rt->rt6i_dev == arg->dev &&
1393             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1394             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1395              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1396               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1397                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1398         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1399         return 0;
1400 }
1401
1402 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1403 {
1404         struct rt6_mtu_change_arg arg;
1405
1406         arg.dev = dev;
1407         arg.mtu = mtu;
1408         read_lock_bh(&rt6_lock);
1409         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1410         read_unlock_bh(&rt6_lock);
1411 }
1412
1413 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1414                               struct in6_rtmsg *rtmsg)
1415 {
1416         memset(rtmsg, 0, sizeof(*rtmsg));
1417
1418         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1419         rtmsg->rtmsg_src_len = r->rtm_src_len;
1420         rtmsg->rtmsg_flags = RTF_UP;
1421         if (r->rtm_type == RTN_UNREACHABLE)
1422                 rtmsg->rtmsg_flags |= RTF_REJECT;
1423
1424         if (rta[RTA_GATEWAY-1]) {
1425                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1426                         return -EINVAL;
1427                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1428                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1429         }
1430         if (rta[RTA_DST-1]) {
1431                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1432                         return -EINVAL;
1433                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1434         }
1435         if (rta[RTA_SRC-1]) {
1436                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1437                         return -EINVAL;
1438                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1439         }
1440         if (rta[RTA_OIF-1]) {
1441                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1442                         return -EINVAL;
1443                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1444         }
1445         if (rta[RTA_PRIORITY-1]) {
1446                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1447                         return -EINVAL;
1448                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1449         }
1450         return 0;
1451 }
1452
1453 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1454 {
1455         struct rtmsg *r = NLMSG_DATA(nlh);
1456         struct in6_rtmsg rtmsg;
1457
1458         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1459                 return -EINVAL;
1460         return ip6_route_del(&rtmsg, nlh, arg);
1461 }
1462
1463 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1464 {
1465         struct rtmsg *r = NLMSG_DATA(nlh);
1466         struct in6_rtmsg rtmsg;
1467
1468         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1469                 return -EINVAL;
1470         return ip6_route_add(&rtmsg, nlh, arg);
1471 }
1472
1473 struct rt6_rtnl_dump_arg
1474 {
1475         struct sk_buff *skb;
1476         struct netlink_callback *cb;
1477 };
1478
1479 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1480                          struct in6_addr *dst,
1481                          struct in6_addr *src,
1482                          int iif,
1483                          int type, u32 pid, u32 seq,
1484                          struct nlmsghdr *in_nlh, int prefix)
1485 {
1486         struct rtmsg *rtm;
1487         struct nlmsghdr  *nlh;
1488         unsigned char    *b = skb->tail;
1489         struct rta_cacheinfo ci;
1490
1491         if (prefix) {   /* user wants prefix routes only */
1492                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1493                         /* success since this is not a prefix route */
1494                         return 1;
1495                 }
1496         }
1497
1498         if (!pid && in_nlh) {
1499                 pid = in_nlh->nlmsg_pid;
1500         }
1501
1502         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1503         rtm = NLMSG_DATA(nlh);
1504         rtm->rtm_family = AF_INET6;
1505         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1506         rtm->rtm_src_len = rt->rt6i_src.plen;
1507         rtm->rtm_tos = 0;
1508         rtm->rtm_table = RT_TABLE_MAIN;
1509         if (rt->rt6i_flags&RTF_REJECT)
1510                 rtm->rtm_type = RTN_UNREACHABLE;
1511         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1512                 rtm->rtm_type = RTN_LOCAL;
1513         else
1514                 rtm->rtm_type = RTN_UNICAST;
1515         rtm->rtm_flags = 0;
1516         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1517         rtm->rtm_protocol = rt->rt6i_protocol;
1518         if (rt->rt6i_flags&RTF_DYNAMIC)
1519                 rtm->rtm_protocol = RTPROT_REDIRECT;
1520         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1521                 rtm->rtm_protocol = RTPROT_KERNEL;
1522         else if (rt->rt6i_flags&RTF_DEFAULT)
1523                 rtm->rtm_protocol = RTPROT_RA;
1524
1525         if (rt->rt6i_flags&RTF_CACHE)
1526                 rtm->rtm_flags |= RTM_F_CLONED;
1527
1528         if (dst) {
1529                 RTA_PUT(skb, RTA_DST, 16, dst);
1530                 rtm->rtm_dst_len = 128;
1531         } else if (rtm->rtm_dst_len)
1532                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1533 #ifdef CONFIG_IPV6_SUBTREES
1534         if (src) {
1535                 RTA_PUT(skb, RTA_SRC, 16, src);
1536                 rtm->rtm_src_len = 128;
1537         } else if (rtm->rtm_src_len)
1538                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1539 #endif
1540         if (iif)
1541                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1542         else if (dst) {
1543                 struct in6_addr saddr_buf;
1544                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1545                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1546         }
1547         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1548                 goto rtattr_failure;
1549         if (rt->u.dst.neighbour)
1550                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1551         if (rt->u.dst.dev)
1552                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1553         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1554         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1555         if (rt->rt6i_expires)
1556                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1557         else
1558                 ci.rta_expires = 0;
1559         ci.rta_used = rt->u.dst.__use;
1560         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1561         ci.rta_error = rt->u.dst.error;
1562         ci.rta_id = 0;
1563         ci.rta_ts = 0;
1564         ci.rta_tsage = 0;
1565         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1566         nlh->nlmsg_len = skb->tail - b;
1567         return skb->len;
1568
1569 nlmsg_failure:
1570 rtattr_failure:
1571         skb_trim(skb, b - skb->data);
1572         return -1;
1573 }
1574
1575 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1576 {
1577         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1578         int prefix;
1579
1580         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1581                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1582                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1583         } else
1584                 prefix = 0;
1585
1586         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1587                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1588                      NULL, prefix);
1589 }
1590
1591 static int fib6_dump_node(struct fib6_walker_t *w)
1592 {
1593         int res;
1594         struct rt6_info *rt;
1595
1596         for (rt = w->leaf; rt; rt = rt->u.next) {
1597                 res = rt6_dump_route(rt, w->args);
1598                 if (res < 0) {
1599                         /* Frame is full, suspend walking */
1600                         w->leaf = rt;
1601                         return 1;
1602                 }
1603                 BUG_TRAP(res!=0);
1604         }
1605         w->leaf = NULL;
1606         return 0;
1607 }
1608
1609 static void fib6_dump_end(struct netlink_callback *cb)
1610 {
1611         struct fib6_walker_t *w = (void*)cb->args[0];
1612
1613         if (w) {
1614                 cb->args[0] = 0;
1615                 fib6_walker_unlink(w);
1616                 kfree(w);
1617         }
1618         if (cb->args[1]) {
1619                 cb->done = (void*)cb->args[1];
1620                 cb->args[1] = 0;
1621         }
1622 }
1623
1624 static int fib6_dump_done(struct netlink_callback *cb)
1625 {
1626         fib6_dump_end(cb);
1627         return cb->done(cb);
1628 }
1629
1630 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1631 {
1632         struct rt6_rtnl_dump_arg arg;
1633         struct fib6_walker_t *w;
1634         int res;
1635
1636         arg.skb = skb;
1637         arg.cb = cb;
1638
1639         w = (void*)cb->args[0];
1640         if (w == NULL) {
1641                 /* New dump:
1642                  * 
1643                  * 1. hook callback destructor.
1644                  */
1645                 cb->args[1] = (long)cb->done;
1646                 cb->done = fib6_dump_done;
1647
1648                 /*
1649                  * 2. allocate and initialize walker.
1650                  */
1651                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1652                 if (w == NULL)
1653                         return -ENOMEM;
1654                 RT6_TRACE("dump<%p", w);
1655                 memset(w, 0, sizeof(*w));
1656                 w->root = &ip6_routing_table;
1657                 w->func = fib6_dump_node;
1658                 w->args = &arg;
1659                 cb->args[0] = (long)w;
1660                 read_lock_bh(&rt6_lock);
1661                 res = fib6_walk(w);
1662                 read_unlock_bh(&rt6_lock);
1663         } else {
1664                 w->args = &arg;
1665                 read_lock_bh(&rt6_lock);
1666                 res = fib6_walk_continue(w);
1667                 read_unlock_bh(&rt6_lock);
1668         }
1669 #if RT6_DEBUG >= 3
1670         if (res <= 0 && skb->len == 0)
1671                 RT6_TRACE("%p>dump end\n", w);
1672 #endif
1673         res = res < 0 ? res : skb->len;
1674         /* res < 0 is an error. (really, impossible)
1675            res == 0 means that dump is complete, but skb still can contain data.
1676            res > 0 dump is not complete, but frame is full.
1677          */
1678         /* Destroy walker, if dump of this table is complete. */
1679         if (res <= 0)
1680                 fib6_dump_end(cb);
1681         return res;
1682 }
1683
1684 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1685 {
1686         struct rtattr **rta = arg;
1687         int iif = 0;
1688         int err = -ENOBUFS;
1689         struct sk_buff *skb;
1690         struct flowi fl;
1691         struct rt6_info *rt;
1692
1693         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1694         if (skb == NULL)
1695                 goto out;
1696
1697         /* Reserve room for dummy headers, this skb can pass
1698            through good chunk of routing engine.
1699          */
1700         skb->mac.raw = skb->data;
1701         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1702
1703         memset(&fl, 0, sizeof(fl));
1704         if (rta[RTA_SRC-1])
1705                 ipv6_addr_copy(&fl.fl6_src,
1706                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1707         if (rta[RTA_DST-1])
1708                 ipv6_addr_copy(&fl.fl6_dst,
1709                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1710
1711         if (rta[RTA_IIF-1])
1712                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1713
1714         if (iif) {
1715                 struct net_device *dev;
1716                 dev = __dev_get_by_index(iif);
1717                 if (!dev) {
1718                         err = -ENODEV;
1719                         goto out_free;
1720                 }
1721         }
1722
1723         fl.oif = 0;
1724         if (rta[RTA_OIF-1])
1725                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1726
1727         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1728
1729         skb->dst = &rt->u.dst;
1730
1731         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1732         err = rt6_fill_node(skb, rt, 
1733                             &fl.fl6_dst, &fl.fl6_src,
1734                             iif,
1735                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1736                             nlh->nlmsg_seq, nlh, 0);
1737         if (err < 0) {
1738                 err = -EMSGSIZE;
1739                 goto out_free;
1740         }
1741
1742         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1743         if (err > 0)
1744                 err = 0;
1745 out:
1746         return err;
1747 out_free:
1748         kfree_skb(skb);
1749         goto out;       
1750 }
1751
1752 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1753 {
1754         struct sk_buff *skb;
1755         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1756
1757         skb = alloc_skb(size, gfp_any());
1758         if (!skb) {
1759                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1760                 return;
1761         }
1762         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1763                 kfree_skb(skb);
1764                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1765                 return;
1766         }
1767         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1768         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1769 }
1770
1771 /*
1772  *      /proc
1773  */
1774
1775 #ifdef CONFIG_PROC_FS
1776
1777 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1778
1779 struct rt6_proc_arg
1780 {
1781         char *buffer;
1782         int offset;
1783         int length;
1784         int skip;
1785         int len;
1786 };
1787
1788 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1789 {
1790         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1791         int i;
1792
1793         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1794                 arg->skip++;
1795                 return 0;
1796         }
1797
1798         if (arg->len >= arg->length)
1799                 return 0;
1800
1801         for (i=0; i<16; i++) {
1802                 sprintf(arg->buffer + arg->len, "%02x",
1803                         rt->rt6i_dst.addr.s6_addr[i]);
1804                 arg->len += 2;
1805         }
1806         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1807                             rt->rt6i_dst.plen);
1808
1809 #ifdef CONFIG_IPV6_SUBTREES
1810         for (i=0; i<16; i++) {
1811                 sprintf(arg->buffer + arg->len, "%02x",
1812                         rt->rt6i_src.addr.s6_addr[i]);
1813                 arg->len += 2;
1814         }
1815         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1816                             rt->rt6i_src.plen);
1817 #else
1818         sprintf(arg->buffer + arg->len,
1819                 "00000000000000000000000000000000 00 ");
1820         arg->len += 36;
1821 #endif
1822
1823         if (rt->rt6i_nexthop) {
1824                 for (i=0; i<16; i++) {
1825                         sprintf(arg->buffer + arg->len, "%02x",
1826                                 rt->rt6i_nexthop->primary_key[i]);
1827                         arg->len += 2;
1828                 }
1829         } else {
1830                 sprintf(arg->buffer + arg->len,
1831                         "00000000000000000000000000000000");
1832                 arg->len += 32;
1833         }
1834         arg->len += sprintf(arg->buffer + arg->len,
1835                             " %08x %08x %08x %08x %8s\n",
1836                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1837                             rt->u.dst.__use, rt->rt6i_flags, 
1838                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1839         return 0;
1840 }
1841
1842 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1843 {
1844         struct rt6_proc_arg arg;
1845         arg.buffer = buffer;
1846         arg.offset = offset;
1847         arg.length = length;
1848         arg.skip = 0;
1849         arg.len = 0;
1850
1851         read_lock_bh(&rt6_lock);
1852         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1853         read_unlock_bh(&rt6_lock);
1854
1855         *start = buffer;
1856         if (offset)
1857                 *start += offset % RT6_INFO_LEN;
1858
1859         arg.len -= offset % RT6_INFO_LEN;
1860
1861         if (arg.len > length)
1862                 arg.len = length;
1863         if (arg.len < 0)
1864                 arg.len = 0;
1865
1866         return arg.len;
1867 }
1868
1869 extern struct rt6_statistics rt6_stats;
1870
1871 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1872 {
1873         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1874                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1875                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1876                       rt6_stats.fib_rt_cache,
1877                       atomic_read(&ip6_dst_ops.entries),
1878                       rt6_stats.fib_discarded_routes);
1879
1880         return 0;
1881 }
1882
1883 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1884 {
1885         return single_open(file, rt6_stats_seq_show, NULL);
1886 }
1887
1888 static struct file_operations rt6_stats_seq_fops = {
1889         .owner   = THIS_MODULE,
1890         .open    = rt6_stats_seq_open,
1891         .read    = seq_read,
1892         .llseek  = seq_lseek,
1893         .release = single_release,
1894 };
1895 #endif  /* CONFIG_PROC_FS */
1896
1897 #ifdef CONFIG_SYSCTL
1898
1899 static int flush_delay;
1900
1901 static
1902 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1903                               void __user *buffer, size_t *lenp)
1904 {
1905         if (write) {
1906                 proc_dointvec(ctl, write, filp, buffer, lenp);
1907                 if (flush_delay < 0)
1908                         flush_delay = 0;
1909                 fib6_run_gc((unsigned long)flush_delay);
1910                 return 0;
1911         } else
1912                 return -EINVAL;
1913 }
1914
1915 ctl_table ipv6_route_table[] = {
1916         {
1917                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1918                 .procname       =       "flush",
1919                 .data           =       &flush_delay,
1920                 .maxlen         =       sizeof(int),
1921                 .mode           =       0644,
1922                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1923         },
1924         {
1925                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1926                 .procname       =       "gc_thresh",
1927                 .data           =       &ip6_dst_ops.gc_thresh,
1928                 .maxlen         =       sizeof(int),
1929                 .mode           =       0644,
1930                 .proc_handler   =       &proc_dointvec,
1931         },
1932         {
1933                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1934                 .procname       =       "max_size",
1935                 .data           =       &ip6_rt_max_size,
1936                 .maxlen         =       sizeof(int),
1937                 .mode           =       0644,
1938                 .proc_handler   =       &proc_dointvec,
1939         },
1940         {
1941                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1942                 .procname       =       "gc_min_interval",
1943                 .data           =       &ip6_rt_gc_min_interval,
1944                 .maxlen         =       sizeof(int),
1945                 .mode           =       0644,
1946                 .proc_handler   =       &proc_dointvec_jiffies,
1947                 .strategy       =       &sysctl_jiffies,
1948         },
1949         {
1950                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1951                 .procname       =       "gc_timeout",
1952                 .data           =       &ip6_rt_gc_timeout,
1953                 .maxlen         =       sizeof(int),
1954                 .mode           =       0644,
1955                 .proc_handler   =       &proc_dointvec_jiffies,
1956                 .strategy       =       &sysctl_jiffies,
1957         },
1958         {
1959                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
1960                 .procname       =       "gc_interval",
1961                 .data           =       &ip6_rt_gc_interval,
1962                 .maxlen         =       sizeof(int),
1963                 .mode           =       0644,
1964                 .proc_handler   =       &proc_dointvec_jiffies,
1965                 .strategy       =       &sysctl_jiffies,
1966         },
1967         {
1968                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
1969                 .procname       =       "gc_elasticity",
1970                 .data           =       &ip6_rt_gc_elasticity,
1971                 .maxlen         =       sizeof(int),
1972                 .mode           =       0644,
1973                 .proc_handler   =       &proc_dointvec_jiffies,
1974                 .strategy       =       &sysctl_jiffies,
1975         },
1976         {
1977                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
1978                 .procname       =       "mtu_expires",
1979                 .data           =       &ip6_rt_mtu_expires,
1980                 .maxlen         =       sizeof(int),
1981                 .mode           =       0644,
1982                 .proc_handler   =       &proc_dointvec_jiffies,
1983                 .strategy       =       &sysctl_jiffies,
1984         },
1985         {
1986                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
1987                 .procname       =       "min_adv_mss",
1988                 .data           =       &ip6_rt_min_advmss,
1989                 .maxlen         =       sizeof(int),
1990                 .mode           =       0644,
1991                 .proc_handler   =       &proc_dointvec_jiffies,
1992                 .strategy       =       &sysctl_jiffies,
1993         },
1994         { .ctl_name = 0 }
1995 };
1996
1997 #endif
1998
1999 void __init ip6_route_init(void)
2000 {
2001         struct proc_dir_entry *p;
2002
2003         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2004                                                      sizeof(struct rt6_info),
2005                                                      0, SLAB_HWCACHE_ALIGN,
2006                                                      NULL, NULL);
2007         if (!ip6_dst_ops.kmem_cachep)
2008                 panic("cannot create ip6_dst_cache");
2009
2010         fib6_init();
2011 #ifdef  CONFIG_PROC_FS
2012         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2013         if (p)
2014                 p->owner = THIS_MODULE;
2015
2016         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2017 #endif
2018 #ifdef CONFIG_XFRM
2019         xfrm6_init();
2020 #endif
2021 }
2022
2023 void __exit ip6_route_cleanup(void)
2024 {
2025 #ifdef CONFIG_PROC_FS
2026         proc_net_remove("ipv6_route");
2027         proc_net_remove("rt6_stats");
2028 #endif
2029 #ifdef CONFIG_XFRM
2030         xfrm6_fini();
2031 #endif
2032         rt6_ifdown(NULL);
2033         fib6_gc_cleanup();
2034         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2035 }