ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int               ip6_dst_gc(void);
87
88 static int              ip6_pkt_discard(struct sk_buff *skb);
89 static void             ip6_link_failure(struct sk_buff *skb);
90 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
92 static struct dst_ops ip6_dst_ops = {
93         .family                 =       AF_INET6,
94         .protocol               =       __constant_htons(ETH_P_IPV6),
95         .gc                     =       ip6_dst_gc,
96         .gc_thresh              =       1024,
97         .check                  =       ip6_dst_check,
98         .negative_advice        =       ip6_negative_advice,
99         .link_failure           =       ip6_link_failure,
100         .update_pmtu            =       ip6_rt_update_pmtu,
101         .entry_size             =       sizeof(struct rt6_info),
102 };
103
104 struct rt6_info ip6_null_entry = {
105         .u = {
106                 .dst = {
107                         .__refcnt       = ATOMIC_INIT(1),
108                         .__use          = 1,
109                         .dev            = &loopback_dev,
110                         .obsolete       = -1,
111                         .error          = -ENETUNREACH,
112                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
113                         .input          = ip6_pkt_discard,
114                         .output         = ip6_pkt_discard,
115                         .ops            = &ip6_dst_ops,
116                         .path           = (struct dst_entry*)&ip6_null_entry,
117                 }
118         },
119         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
120         .rt6i_metric    = ~(u32) 0,
121         .rt6i_ref       = ATOMIC_INIT(1),
122 };
123
124 struct fib6_node ip6_routing_table = {
125         .leaf           = &ip6_null_entry,
126         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
127 };
128
129 /* Protects all the ip6 fib */
130
131 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
132
133
134 /* allocate dst with ip6_dst_ops */
135 static __inline__ struct rt6_info *ip6_dst_alloc(void)
136 {
137         return dst_alloc(&ip6_dst_ops);
138 }
139
140 /*
141  *      Route lookup. Any rt6_lock is implied.
142  */
143
144 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
145                                                     int oif,
146                                                     int strict)
147 {
148         struct rt6_info *local = NULL;
149         struct rt6_info *sprt;
150
151         if (oif) {
152                 for (sprt = rt; sprt; sprt = sprt->u.next) {
153                         struct net_device *dev = sprt->rt6i_dev;
154                         if (dev->ifindex == oif)
155                                 return sprt;
156                         if (dev->flags&IFF_LOOPBACK)
157                                 local = sprt;
158                 }
159
160                 if (local)
161                         return local;
162
163                 if (strict)
164                         return &ip6_null_entry;
165         }
166         return rt;
167 }
168
169 /*
170  *      pointer to the last default router chosen. BH is disabled locally.
171  */
172 static struct rt6_info *rt6_dflt_pointer;
173 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
174
175 /* Default Router Selection (RFC 2461 6.3.6) */
176 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
177 {
178         struct rt6_info *match = NULL;
179         struct rt6_info *sprt;
180         int mpri = 0;
181
182         for (sprt = rt; sprt; sprt = sprt->u.next) {
183                 struct neighbour *neigh;
184                 int m = 0;
185
186                 if (!oif ||
187                     (sprt->rt6i_dev &&
188                      sprt->rt6i_dev->ifindex == oif))
189                         m += 8;
190
191                 if (sprt == rt6_dflt_pointer)
192                         m += 4;
193
194                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
195                         read_lock_bh(&neigh->lock);
196                         switch (neigh->nud_state) {
197                         case NUD_REACHABLE:
198                                 m += 3;
199                                 break;
200
201                         case NUD_STALE:
202                         case NUD_DELAY:
203                         case NUD_PROBE:
204                                 m += 2;
205                                 break;
206
207                         case NUD_NOARP:
208                         case NUD_PERMANENT:
209                                 m += 1;
210                                 break;
211
212                         case NUD_INCOMPLETE:
213                         default:
214                                 read_unlock_bh(&neigh->lock);
215                                 continue;
216                         }
217                         read_unlock_bh(&neigh->lock);
218                 } else {
219                         continue;
220                 }
221
222                 if (m > mpri || m >= 12) {
223                         match = sprt;
224                         mpri = m;
225                         if (m >= 12) {
226                                 /* we choose the last default router if it
227                                  * is in (probably) reachable state.
228                                  * If route changed, we should do pmtu
229                                  * discovery. --yoshfuji
230                                  */
231                                 break;
232                         }
233                 }
234         }
235
236         spin_lock(&rt6_dflt_lock);
237         if (!match) {
238                 /*
239                  *      No default routers are known to be reachable.
240                  *      SHOULD round robin
241                  */
242                 if (rt6_dflt_pointer) {
243                         for (sprt = rt6_dflt_pointer->u.next;
244                              sprt; sprt = sprt->u.next) {
245                                 if (sprt->u.dst.obsolete <= 0 &&
246                                     sprt->u.dst.error == 0) {
247                                         match = sprt;
248                                         break;
249                                 }
250                         }
251                         for (sprt = rt;
252                              !match && sprt;
253                              sprt = sprt->u.next) {
254                                 if (sprt->u.dst.obsolete <= 0 &&
255                                     sprt->u.dst.error == 0) {
256                                         match = sprt;
257                                         break;
258                                 }
259                                 if (sprt == rt6_dflt_pointer)
260                                         break;
261                         }
262                 }
263         }
264
265         if (match) {
266                 if (rt6_dflt_pointer != match)
267                         RT6_TRACE("changed default router: %p->%p\n",
268                                   rt6_dflt_pointer, match);
269                 rt6_dflt_pointer = match;
270         }
271         spin_unlock(&rt6_dflt_lock);
272
273         if (!match) {
274                 /*
275                  * Last Resort: if no default routers found, 
276                  * use addrconf default route.
277                  * We don't record this route.
278                  */
279                 for (sprt = ip6_routing_table.leaf;
280                      sprt; sprt = sprt->u.next) {
281                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
282                             (!oif ||
283                              (sprt->rt6i_dev &&
284                               sprt->rt6i_dev->ifindex == oif))) {
285                                 match = sprt;
286                                 break;
287                         }
288                 }
289                 if (!match) {
290                         /* no default route.  give up. */
291                         match = &ip6_null_entry;
292                 }
293         }
294
295         return match;
296 }
297
298 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
299                             int oif, int strict)
300 {
301         struct fib6_node *fn;
302         struct rt6_info *rt;
303
304         read_lock_bh(&rt6_lock);
305         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
306         rt = rt6_device_match(fn->leaf, oif, strict);
307         dst_hold(&rt->u.dst);
308         rt->u.dst.__use++;
309         read_unlock_bh(&rt6_lock);
310
311         rt->u.dst.lastuse = jiffies;
312         if (rt->u.dst.error == 0)
313                 return rt;
314         dst_release(&rt->u.dst);
315         return NULL;
316 }
317
318 /* rt6_ins is called with FREE rt6_lock.
319    It takes new route entry, the addition fails by any reason the
320    route is freed. In any case, if caller does not hold it, it may
321    be destroyed.
322  */
323
324 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
325 {
326         int err;
327
328         write_lock_bh(&rt6_lock);
329         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
330         write_unlock_bh(&rt6_lock);
331
332         return err;
333 }
334
335 /* No rt6_lock! If COW failed, the function returns dead route entry
336    with dst->error set to errno value.
337  */
338
339 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
340                                 struct in6_addr *saddr)
341 {
342         int err;
343         struct rt6_info *rt;
344
345         /*
346          *      Clone the route.
347          */
348
349         rt = ip6_rt_copy(ort);
350
351         if (rt) {
352                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
353
354                 if (!(rt->rt6i_flags&RTF_GATEWAY))
355                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
356
357                 rt->rt6i_dst.plen = 128;
358                 rt->rt6i_flags |= RTF_CACHE;
359                 rt->u.dst.flags |= DST_HOST;
360
361 #ifdef CONFIG_IPV6_SUBTREES
362                 if (rt->rt6i_src.plen && saddr) {
363                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
364                         rt->rt6i_src.plen = 128;
365                 }
366 #endif
367
368                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
369
370                 dst_hold(&rt->u.dst);
371
372                 err = rt6_ins(rt, NULL, NULL);
373                 if (err == 0)
374                         return rt;
375
376                 rt->u.dst.error = err;
377
378                 return rt;
379         }
380         dst_hold(&ip6_null_entry.u.dst);
381         return &ip6_null_entry;
382 }
383
384 #define BACKTRACK() \
385 if (rt == &ip6_null_entry && strict) { \
386        while ((fn = fn->parent) != NULL) { \
387                 if (fn->fn_flags & RTN_ROOT) { \
388                         dst_hold(&rt->u.dst); \
389                         goto out; \
390                 } \
391                 if (fn->fn_flags & RTN_RTINFO) \
392                         goto restart; \
393         } \
394 }
395
396
397 void ip6_route_input(struct sk_buff *skb)
398 {
399         struct fib6_node *fn;
400         struct rt6_info *rt;
401         int strict;
402         int attempts = 3;
403
404         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
405
406 relookup:
407         read_lock_bh(&rt6_lock);
408
409         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
410                          &skb->nh.ipv6h->saddr);
411
412 restart:
413         rt = fn->leaf;
414
415         if ((rt->rt6i_flags & RTF_CACHE)) {
416                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
417                 BACKTRACK();
418                 dst_hold(&rt->u.dst);
419                 goto out;
420         }
421
422         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
423         BACKTRACK();
424
425         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
426                 read_unlock_bh(&rt6_lock);
427
428                 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
429                              &skb->nh.ipv6h->saddr);
430                         
431                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
432                         goto out2;
433                 /* Race condition! In the gap, when rt6_lock was
434                    released someone could insert this route.  Relookup.
435                 */
436                 dst_release(&rt->u.dst);
437                 goto relookup;
438         }
439         dst_hold(&rt->u.dst);
440
441 out:
442         read_unlock_bh(&rt6_lock);
443 out2:
444         rt->u.dst.lastuse = jiffies;
445         rt->u.dst.__use++;
446         skb->dst = (struct dst_entry *) rt;
447 }
448
449 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
450 {
451         struct fib6_node *fn;
452         struct rt6_info *rt;
453         int strict;
454         int attempts = 3;
455
456         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
457
458 relookup:
459         read_lock_bh(&rt6_lock);
460
461         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
462
463 restart:
464         rt = fn->leaf;
465
466         if ((rt->rt6i_flags & RTF_CACHE)) {
467                 rt = rt6_device_match(rt, fl->oif, strict);
468                 BACKTRACK();
469                 dst_hold(&rt->u.dst);
470                 goto out;
471         }
472         if (rt->rt6i_flags & RTF_DEFAULT) {
473                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
474                         rt = rt6_best_dflt(rt, fl->oif);
475         } else {
476                 rt = rt6_device_match(rt, fl->oif, strict);
477                 BACKTRACK();
478         }
479
480         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
481                 read_unlock_bh(&rt6_lock);
482
483                 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
484
485                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
486                         goto out2;
487
488                 /* Race condition! In the gap, when rt6_lock was
489                    released someone could insert this route.  Relookup.
490                 */
491                 dst_release(&rt->u.dst);
492                 goto relookup;
493         }
494         dst_hold(&rt->u.dst);
495
496 out:
497         read_unlock_bh(&rt6_lock);
498 out2:
499         rt->u.dst.lastuse = jiffies;
500         rt->u.dst.__use++;
501         return &rt->u.dst;
502 }
503
504
505 /*
506  *      Destination cache support functions
507  */
508
509 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
510 {
511         struct rt6_info *rt;
512
513         rt = (struct rt6_info *) dst;
514
515         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
516                 return dst;
517
518         dst_release(dst);
519         return NULL;
520 }
521
522 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
523 {
524         struct rt6_info *rt = (struct rt6_info *) dst;
525
526         if (rt) {
527                 if (rt->rt6i_flags & RTF_CACHE)
528                         ip6_del_rt(rt, NULL, NULL);
529                 else
530                         dst_release(dst);
531         }
532         return NULL;
533 }
534
535 static void ip6_link_failure(struct sk_buff *skb)
536 {
537         struct rt6_info *rt;
538
539         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
540
541         rt = (struct rt6_info *) skb->dst;
542         if (rt) {
543                 if (rt->rt6i_flags&RTF_CACHE) {
544                         dst_set_expires(&rt->u.dst, 0);
545                         rt->rt6i_flags |= RTF_EXPIRES;
546                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
547                         rt->rt6i_node->fn_sernum = -1;
548         }
549 }
550
551 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
552 {
553         struct rt6_info *rt6 = (struct rt6_info*)dst;
554
555         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
556                 rt6->rt6i_flags |= RTF_MODIFIED;
557                 dst->metrics[RTAX_MTU-1] = mtu;
558         }
559 }
560
561 /* Protected by rt6_lock.  */
562 static struct dst_entry *ndisc_dst_gc_list;
563
564 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
565                                   struct neighbour *neigh,
566                                   struct in6_addr *addr,
567                                   int (*output)(struct sk_buff *))
568 {
569         struct rt6_info *rt = ip6_dst_alloc();
570
571         if (unlikely(rt == NULL))
572                 goto out;
573
574         if (dev)
575                 dev_hold(dev);
576         if (neigh)
577                 neigh_hold(neigh);
578         else
579                 neigh = ndisc_get_neigh(dev, addr);
580
581         rt->rt6i_dev      = dev;
582         rt->rt6i_nexthop  = neigh;
583         rt->rt6i_expires  = 0;
584         rt->rt6i_flags    = RTF_LOCAL;
585         rt->rt6i_metric   = 0;
586         atomic_set(&rt->u.dst.__refcnt, 1);
587         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
588         rt->u.dst.output  = output;
589
590         write_lock_bh(&rt6_lock);
591         rt->u.dst.next = ndisc_dst_gc_list;
592         ndisc_dst_gc_list = &rt->u.dst;
593         write_unlock_bh(&rt6_lock);
594
595         fib6_force_start_gc();
596
597 out:
598         return (struct dst_entry *)rt;
599 }
600
601 int ndisc_dst_gc(int *more)
602 {
603         struct dst_entry *dst, *next, **pprev;
604         int freed;
605
606         next = NULL;
607         pprev = &ndisc_dst_gc_list;
608         freed = 0;
609         while ((dst = *pprev) != NULL) {
610                 if (!atomic_read(&dst->__refcnt)) {
611                         *pprev = dst->next;
612                         dst_free(dst);
613                         freed++;
614                 } else {
615                         pprev = &dst->next;
616                         (*more)++;
617                 }
618         }
619
620         return freed;
621 }
622
623 static int ip6_dst_gc(void)
624 {
625         static unsigned expire = 30*HZ;
626         static unsigned long last_gc;
627         unsigned long now = jiffies;
628
629         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
630             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
631                 goto out;
632
633         expire++;
634         fib6_run_gc(expire);
635         last_gc = now;
636         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
637                 expire = ip6_rt_gc_timeout>>1;
638
639 out:
640         expire -= expire>>ip6_rt_gc_elasticity;
641         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
642 }
643
644 /* Clean host part of a prefix. Not necessary in radix tree,
645    but results in cleaner routing tables.
646
647    Remove it only when all the things will work!
648  */
649
650 static int ipv6_get_mtu(struct net_device *dev)
651 {
652         int mtu = IPV6_MIN_MTU;
653         struct inet6_dev *idev;
654
655         idev = in6_dev_get(dev);
656         if (idev) {
657                 mtu = idev->cnf.mtu6;
658                 in6_dev_put(idev);
659         }
660         return mtu;
661 }
662
663 static inline unsigned int ipv6_advmss(unsigned int mtu)
664 {
665         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
666
667         if (mtu < ip6_rt_min_advmss)
668                 mtu = ip6_rt_min_advmss;
669
670         /*
671          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
672          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
673          * IPV6_MAXPLEN is also valid and means: "any MSS, 
674          * rely only on pmtu discovery"
675          */
676         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
677                 mtu = IPV6_MAXPLEN;
678         return mtu;
679 }
680
681 static int ipv6_get_hoplimit(struct net_device *dev)
682 {
683         int hoplimit = ipv6_devconf.hop_limit;
684         struct inet6_dev *idev;
685
686         idev = in6_dev_get(dev);
687         if (idev) {
688                 hoplimit = idev->cnf.hop_limit;
689                 in6_dev_put(idev);
690         }
691         return hoplimit;
692 }
693
694 /*
695  *
696  */
697
698 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
699 {
700         int err;
701         struct rtmsg *r;
702         struct rtattr **rta;
703         struct rt6_info *rt;
704         struct net_device *dev = NULL;
705         int addr_type;
706
707         rta = (struct rtattr **) _rtattr;
708
709         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
710                 return -EINVAL;
711 #ifndef CONFIG_IPV6_SUBTREES
712         if (rtmsg->rtmsg_src_len)
713                 return -EINVAL;
714 #endif
715         if (rtmsg->rtmsg_metric == 0)
716                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
717
718         rt = ip6_dst_alloc();
719
720         if (rt == NULL)
721                 return -ENOMEM;
722
723         rt->u.dst.obsolete = -1;
724         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
725         if (nlh && (r = NLMSG_DATA(nlh))) {
726                 rt->rt6i_protocol = r->rtm_protocol;
727         } else {
728                 rt->rt6i_protocol = RTPROT_BOOT;
729         }
730
731         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
732
733         if (addr_type & IPV6_ADDR_MULTICAST)
734                 rt->u.dst.input = ip6_mc_input;
735         else
736                 rt->u.dst.input = ip6_forward;
737
738         rt->u.dst.output = ip6_output;
739
740         if (rtmsg->rtmsg_ifindex) {
741                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
742                 err = -ENODEV;
743                 if (dev == NULL)
744                         goto out;
745         }
746
747         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
748                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
749         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
750         if (rt->rt6i_dst.plen == 128)
751                rt->u.dst.flags = DST_HOST;
752
753 #ifdef CONFIG_IPV6_SUBTREES
754         ipv6_addr_prefix(&rt->rt6i_src.addr, 
755                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
756         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
757 #endif
758
759         rt->rt6i_metric = rtmsg->rtmsg_metric;
760
761         /* We cannot add true routes via loopback here,
762            they would result in kernel looping; promote them to reject routes
763          */
764         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
765             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
766                 if (dev)
767                         dev_put(dev);
768                 dev = &loopback_dev;
769                 dev_hold(dev);
770                 rt->u.dst.output = ip6_pkt_discard;
771                 rt->u.dst.input = ip6_pkt_discard;
772                 rt->u.dst.error = -ENETUNREACH;
773                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
774                 goto install_route;
775         }
776
777         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
778                 struct in6_addr *gw_addr;
779                 int gwa_type;
780
781                 gw_addr = &rtmsg->rtmsg_gateway;
782                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
783                 gwa_type = ipv6_addr_type(gw_addr);
784
785                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
786                         struct rt6_info *grt;
787
788                         /* IPv6 strictly inhibits using not link-local
789                            addresses as nexthop address.
790                            Otherwise, router will not able to send redirects.
791                            It is very good, but in some (rare!) circumstances
792                            (SIT, PtP, NBMA NOARP links) it is handy to allow
793                            some exceptions. --ANK
794                          */
795                         err = -EINVAL;
796                         if (!(gwa_type&IPV6_ADDR_UNICAST))
797                                 goto out;
798
799                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
800
801                         err = -EHOSTUNREACH;
802                         if (grt == NULL)
803                                 goto out;
804                         if (dev) {
805                                 if (dev != grt->rt6i_dev) {
806                                         dst_release(&grt->u.dst);
807                                         goto out;
808                                 }
809                         } else {
810                                 dev = grt->rt6i_dev;
811                                 dev_hold(dev);
812                         }
813                         if (!(grt->rt6i_flags&RTF_GATEWAY))
814                                 err = 0;
815                         dst_release(&grt->u.dst);
816
817                         if (err)
818                                 goto out;
819                 }
820                 err = -EINVAL;
821                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
822                         goto out;
823         }
824
825         err = -ENODEV;
826         if (dev == NULL)
827                 goto out;
828
829         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
830                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
831                 if (IS_ERR(rt->rt6i_nexthop)) {
832                         err = PTR_ERR(rt->rt6i_nexthop);
833                         rt->rt6i_nexthop = NULL;
834                         goto out;
835                 }
836         }
837
838         rt->rt6i_flags = rtmsg->rtmsg_flags;
839
840 install_route:
841         if (rta && rta[RTA_METRICS-1]) {
842                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
843                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
844
845                 while (RTA_OK(attr, attrlen)) {
846                         unsigned flavor = attr->rta_type;
847                         if (flavor) {
848                                 if (flavor > RTAX_MAX) {
849                                         err = -EINVAL;
850                                         goto out;
851                                 }
852                                 rt->u.dst.metrics[flavor-1] =
853                                         *(u32 *)RTA_DATA(attr);
854                         }
855                         attr = RTA_NEXT(attr, attrlen);
856                 }
857         }
858
859         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
860                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
861                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
862                                 IPV6_DEFAULT_MCASTHOPS;
863                 else
864                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
865                                 ipv6_get_hoplimit(dev);
866         }
867
868         if (!rt->u.dst.metrics[RTAX_MTU-1])
869                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
870         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
871                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
872         rt->u.dst.dev = dev;
873         return rt6_ins(rt, nlh, _rtattr);
874
875 out:
876         if (dev)
877                 dev_put(dev);
878         dst_free((struct dst_entry *) rt);
879         return err;
880 }
881
882 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
883 {
884         int err;
885
886         write_lock_bh(&rt6_lock);
887
888         spin_lock_bh(&rt6_dflt_lock);
889         rt6_dflt_pointer = NULL;
890         spin_unlock_bh(&rt6_dflt_lock);
891
892         dst_release(&rt->u.dst);
893
894         err = fib6_del(rt, nlh, _rtattr);
895         write_unlock_bh(&rt6_lock);
896
897         return err;
898 }
899
900 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
901 {
902         struct fib6_node *fn;
903         struct rt6_info *rt;
904         int err = -ESRCH;
905
906         read_lock_bh(&rt6_lock);
907
908         fn = fib6_locate(&ip6_routing_table,
909                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
910                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
911         
912         if (fn) {
913                 for (rt = fn->leaf; rt; rt = rt->u.next) {
914                         if (rtmsg->rtmsg_ifindex &&
915                             (rt->rt6i_dev == NULL ||
916                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
917                                 continue;
918                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
919                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
920                                 continue;
921                         if (rtmsg->rtmsg_metric &&
922                             rtmsg->rtmsg_metric != rt->rt6i_metric)
923                                 continue;
924                         dst_hold(&rt->u.dst);
925                         read_unlock_bh(&rt6_lock);
926
927                         return ip6_del_rt(rt, nlh, _rtattr);
928                 }
929         }
930         read_unlock_bh(&rt6_lock);
931
932         return err;
933 }
934
935 /*
936  *      Handle redirects
937  */
938 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
939                   struct neighbour *neigh, int on_link)
940 {
941         struct rt6_info *rt, *nrt;
942
943         /* Locate old route to this destination. */
944         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
945
946         if (rt == NULL)
947                 return;
948
949         if (neigh->dev != rt->rt6i_dev)
950                 goto out;
951
952         /* Redirect received -> path was valid.
953            Look, redirects are sent only in response to data packets,
954            so that this nexthop apparently is reachable. --ANK
955          */
956         dst_confirm(&rt->u.dst);
957
958         /* Duplicate redirect: silently ignore. */
959         if (neigh == rt->u.dst.neighbour)
960                 goto out;
961
962         /* Current route is on-link; redirect is always invalid.
963            
964            Seems, previous statement is not true. It could
965            be node, which looks for us as on-link (f.e. proxy ndisc)
966            But then router serving it might decide, that we should
967            know truth 8)8) --ANK (980726).
968          */
969         if (!(rt->rt6i_flags&RTF_GATEWAY))
970                 goto out;
971
972         /*
973          *      RFC 2461 specifies that redirects should only be
974          *      accepted if they come from the nexthop to the target.
975          *      Due to the way default routers are chosen, this notion
976          *      is a bit fuzzy and one might need to check all default
977          *      routers.
978          */
979
980         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
981                 if (rt->rt6i_flags & RTF_DEFAULT) {
982                         struct rt6_info *rt1;
983
984                         read_lock(&rt6_lock);
985                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
986                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
987                                         dst_hold(&rt1->u.dst);
988                                         dst_release(&rt->u.dst);
989                                         read_unlock(&rt6_lock);
990                                         rt = rt1;
991                                         goto source_ok;
992                                 }
993                         }
994                         read_unlock(&rt6_lock);
995                 }
996                 if (net_ratelimit())
997                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
998                                "for redirect target\n");
999                 goto out;
1000         }
1001
1002 source_ok:
1003
1004         /*
1005          *      We have finally decided to accept it.
1006          */
1007
1008         nrt = ip6_rt_copy(rt);
1009         if (nrt == NULL)
1010                 goto out;
1011
1012         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1013         if (on_link)
1014                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1015
1016         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1017         nrt->rt6i_dst.plen = 128;
1018         nrt->u.dst.flags |= DST_HOST;
1019
1020         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1021         nrt->rt6i_nexthop = neigh_clone(neigh);
1022         /* Reset pmtu, it may be better */
1023         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1024         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1025
1026         if (rt6_ins(nrt, NULL, NULL))
1027                 goto out;
1028
1029         if (rt->rt6i_flags&RTF_CACHE) {
1030                 ip6_del_rt(rt, NULL, NULL);
1031                 return;
1032         }
1033
1034 out:
1035         dst_release(&rt->u.dst);
1036         return;
1037 }
1038
1039 /*
1040  *      Handle ICMP "packet too big" messages
1041  *      i.e. Path MTU discovery
1042  */
1043
1044 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1045                         struct net_device *dev, u32 pmtu)
1046 {
1047         struct rt6_info *rt, *nrt;
1048
1049         if (pmtu < IPV6_MIN_MTU) {
1050                 if (net_ratelimit())
1051                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1052                                pmtu);
1053                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1054                    link MTU if the node receives a Packet Too Big message
1055                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1056                    */
1057                 pmtu = IPV6_MIN_MTU;
1058         }
1059
1060         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1061
1062         if (rt == NULL)
1063                 return;
1064
1065         if (pmtu >= dst_pmtu(&rt->u.dst))
1066                 goto out;
1067
1068         /* New mtu received -> path was valid.
1069            They are sent only in response to data packets,
1070            so that this nexthop apparently is reachable. --ANK
1071          */
1072         dst_confirm(&rt->u.dst);
1073
1074         /* Host route. If it is static, it would be better
1075            not to override it, but add new one, so that
1076            when cache entry will expire old pmtu
1077            would return automatically.
1078          */
1079         if (rt->rt6i_flags & RTF_CACHE) {
1080                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1081                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1082                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1083                 goto out;
1084         }
1085
1086         /* Network route.
1087            Two cases are possible:
1088            1. It is connected route. Action: COW
1089            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1090          */
1091         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1092                 nrt = rt6_cow(rt, daddr, saddr);
1093                 if (!nrt->u.dst.error) {
1094                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1095                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1096                            happened within 5 mins, the recommended timer is 10 mins.
1097                            Here this route expiration time is set to ip6_rt_mtu_expires
1098                            which is 10 mins. After 10 mins the decreased pmtu is expired
1099                            and detecting PMTU increase will be automatically happened.
1100                          */
1101                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1102                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1103                 }
1104                 dst_release(&nrt->u.dst);
1105         } else {
1106                 nrt = ip6_rt_copy(rt);
1107                 if (nrt == NULL)
1108                         goto out;
1109                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1110                 nrt->rt6i_dst.plen = 128;
1111                 nrt->u.dst.flags |= DST_HOST;
1112                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1113                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1114                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1115                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1116                 rt6_ins(nrt, NULL, NULL);
1117         }
1118
1119 out:
1120         dst_release(&rt->u.dst);
1121 }
1122
1123 /*
1124  *      Misc support functions
1125  */
1126
1127 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1128 {
1129         struct rt6_info *rt = ip6_dst_alloc();
1130
1131         if (rt) {
1132                 rt->u.dst.input = ort->u.dst.input;
1133                 rt->u.dst.output = ort->u.dst.output;
1134
1135                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1136                 rt->u.dst.dev = ort->u.dst.dev;
1137                 if (rt->u.dst.dev)
1138                         dev_hold(rt->u.dst.dev);
1139                 rt->u.dst.lastuse = jiffies;
1140                 rt->rt6i_expires = 0;
1141
1142                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1143                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1144                 rt->rt6i_metric = 0;
1145
1146                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1147 #ifdef CONFIG_IPV6_SUBTREES
1148                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1149 #endif
1150         }
1151         return rt;
1152 }
1153
1154 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1155 {       
1156         struct rt6_info *rt;
1157         struct fib6_node *fn;
1158
1159         fn = &ip6_routing_table;
1160
1161         write_lock_bh(&rt6_lock);
1162         for (rt = fn->leaf; rt; rt=rt->u.next) {
1163                 if (dev == rt->rt6i_dev &&
1164                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1165                         break;
1166         }
1167         if (rt)
1168                 dst_hold(&rt->u.dst);
1169         write_unlock_bh(&rt6_lock);
1170         return rt;
1171 }
1172
1173 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1174                                      struct net_device *dev)
1175 {
1176         struct in6_rtmsg rtmsg;
1177
1178         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1179         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1180         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1181         rtmsg.rtmsg_metric = 1024;
1182         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1183
1184         rtmsg.rtmsg_ifindex = dev->ifindex;
1185
1186         ip6_route_add(&rtmsg, NULL, NULL);
1187         return rt6_get_dflt_router(gwaddr, dev);
1188 }
1189
1190 void rt6_purge_dflt_routers(int last_resort)
1191 {
1192         struct rt6_info *rt;
1193         u32 flags;
1194
1195         if (last_resort)
1196                 flags = RTF_ALLONLINK;
1197         else
1198                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1199
1200 restart:
1201         read_lock_bh(&rt6_lock);
1202         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1203                 if (rt->rt6i_flags & flags) {
1204                         dst_hold(&rt->u.dst);
1205
1206                         spin_lock_bh(&rt6_dflt_lock);
1207                         rt6_dflt_pointer = NULL;
1208                         spin_unlock_bh(&rt6_dflt_lock);
1209
1210                         read_unlock_bh(&rt6_lock);
1211
1212                         ip6_del_rt(rt, NULL, NULL);
1213
1214                         goto restart;
1215                 }
1216         }
1217         read_unlock_bh(&rt6_lock);
1218 }
1219
1220 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1221 {
1222         struct in6_rtmsg rtmsg;
1223         int err;
1224
1225         switch(cmd) {
1226         case SIOCADDRT:         /* Add a route */
1227         case SIOCDELRT:         /* Delete a route */
1228                 if (!capable(CAP_NET_ADMIN))
1229                         return -EPERM;
1230                 err = copy_from_user(&rtmsg, arg,
1231                                      sizeof(struct in6_rtmsg));
1232                 if (err)
1233                         return -EFAULT;
1234                         
1235                 rtnl_lock();
1236                 switch (cmd) {
1237                 case SIOCADDRT:
1238                         err = ip6_route_add(&rtmsg, NULL, NULL);
1239                         break;
1240                 case SIOCDELRT:
1241                         err = ip6_route_del(&rtmsg, NULL, NULL);
1242                         break;
1243                 default:
1244                         err = -EINVAL;
1245                 }
1246                 rtnl_unlock();
1247
1248                 return err;
1249         };
1250
1251         return -EINVAL;
1252 }
1253
1254 /*
1255  *      Drop the packet on the floor
1256  */
1257
1258 int ip6_pkt_discard(struct sk_buff *skb)
1259 {
1260         IP6_INC_STATS(Ip6OutNoRoutes);
1261         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1262         kfree_skb(skb);
1263         return 0;
1264 }
1265
1266 /*
1267  *      Add address
1268  */
1269
1270 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1271 {
1272         struct rt6_info *rt = ip6_dst_alloc();
1273
1274         if (rt == NULL)
1275                 return -ENOMEM;
1276
1277         dev_hold(&loopback_dev);
1278
1279         rt->u.dst.flags = DST_HOST;
1280         rt->u.dst.input = ip6_input;
1281         rt->u.dst.output = ip6_output;
1282         rt->rt6i_dev = &loopback_dev;
1283         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1284         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1285         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1286         rt->u.dst.obsolete = -1;
1287
1288         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1289         if (!anycast)
1290                 rt->rt6i_flags |= RTF_LOCAL;
1291         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1292         if (rt->rt6i_nexthop == NULL) {
1293                 dst_free((struct dst_entry *) rt);
1294                 return -ENOMEM;
1295         }
1296
1297         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1298         rt->rt6i_dst.plen = 128;
1299         rt6_ins(rt, NULL, NULL);
1300
1301         return 0;
1302 }
1303
1304 /* Delete address. Warning: you should check that this address
1305    disappeared before calling this function.
1306  */
1307
1308 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1309 {
1310         struct rt6_info *rt;
1311         int err = -ENOENT;
1312
1313         rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1314         if (rt) {
1315                 if (rt->rt6i_dst.plen == 128)
1316                         err = ip6_del_rt(rt, NULL, NULL);
1317                 else
1318                         dst_release(&rt->u.dst);
1319         }
1320
1321         return err;
1322 }
1323
1324 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1325 {
1326         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1327             rt != &ip6_null_entry) {
1328                 RT6_TRACE("deleted by ifdown %p\n", rt);
1329                 return -1;
1330         }
1331         return 0;
1332 }
1333
1334 void rt6_ifdown(struct net_device *dev)
1335 {
1336         write_lock_bh(&rt6_lock);
1337         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1338         write_unlock_bh(&rt6_lock);
1339 }
1340
1341 struct rt6_mtu_change_arg
1342 {
1343         struct net_device *dev;
1344         unsigned mtu;
1345 };
1346
1347 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1348 {
1349         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1350         struct inet6_dev *idev;
1351
1352         /* In IPv6 pmtu discovery is not optional,
1353            so that RTAX_MTU lock cannot disable it.
1354            We still use this lock to block changes
1355            caused by addrconf/ndisc.
1356         */
1357
1358         idev = __in6_dev_get(arg->dev);
1359         if (idev == NULL)
1360                 return 0;
1361
1362         /* For administrative MTU increase, there is no way to discover
1363            IPv6 PMTU increase, so PMTU increase should be updated here.
1364            Since RFC 1981 doesn't include administrative MTU increase
1365            update PMTU increase is a MUST. (i.e. jumbo frame)
1366          */
1367         /*
1368            If new MTU is less than route PMTU, this new MTU will be the
1369            lowest MTU in the path, update the route PMTU to reflect PMTU
1370            decreases; if new MTU is greater than route PMTU, and the
1371            old MTU is the lowest MTU in the path, update the route PMTU
1372            to reflect the increase. In this case if the other nodes' MTU
1373            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1374            PMTU discouvery.
1375          */
1376         if (rt->rt6i_dev == arg->dev &&
1377             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1378             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1379              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1380               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1381                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1382         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1383         return 0;
1384 }
1385
1386 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1387 {
1388         struct rt6_mtu_change_arg arg;
1389
1390         arg.dev = dev;
1391         arg.mtu = mtu;
1392         read_lock_bh(&rt6_lock);
1393         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1394         read_unlock_bh(&rt6_lock);
1395 }
1396
1397 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1398                               struct in6_rtmsg *rtmsg)
1399 {
1400         memset(rtmsg, 0, sizeof(*rtmsg));
1401
1402         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1403         rtmsg->rtmsg_src_len = r->rtm_src_len;
1404         rtmsg->rtmsg_flags = RTF_UP;
1405         if (r->rtm_type == RTN_UNREACHABLE)
1406                 rtmsg->rtmsg_flags |= RTF_REJECT;
1407
1408         if (rta[RTA_GATEWAY-1]) {
1409                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1410                         return -EINVAL;
1411                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1412                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1413         }
1414         if (rta[RTA_DST-1]) {
1415                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1416                         return -EINVAL;
1417                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1418         }
1419         if (rta[RTA_SRC-1]) {
1420                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1421                         return -EINVAL;
1422                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1423         }
1424         if (rta[RTA_OIF-1]) {
1425                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1426                         return -EINVAL;
1427                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1428         }
1429         if (rta[RTA_PRIORITY-1]) {
1430                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1431                         return -EINVAL;
1432                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1433         }
1434         return 0;
1435 }
1436
1437 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1438 {
1439         struct rtmsg *r = NLMSG_DATA(nlh);
1440         struct in6_rtmsg rtmsg;
1441
1442         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1443                 return -EINVAL;
1444         return ip6_route_del(&rtmsg, nlh, arg);
1445 }
1446
1447 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1448 {
1449         struct rtmsg *r = NLMSG_DATA(nlh);
1450         struct in6_rtmsg rtmsg;
1451
1452         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1453                 return -EINVAL;
1454         return ip6_route_add(&rtmsg, nlh, arg);
1455 }
1456
1457 struct rt6_rtnl_dump_arg
1458 {
1459         struct sk_buff *skb;
1460         struct netlink_callback *cb;
1461 };
1462
1463 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1464                          struct in6_addr *dst,
1465                          struct in6_addr *src,
1466                          int iif,
1467                          int type, u32 pid, u32 seq,
1468                          struct nlmsghdr *in_nlh, int prefix)
1469 {
1470         struct rtmsg *rtm;
1471         struct nlmsghdr  *nlh;
1472         unsigned char    *b = skb->tail;
1473         struct rta_cacheinfo ci;
1474
1475         if (prefix) {   /* user wants prefix routes only */
1476                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1477                         /* success since this is not a prefix route */
1478                         return 1;
1479                 }
1480         }
1481
1482         if (!pid && in_nlh) {
1483                 pid = in_nlh->nlmsg_pid;
1484         }
1485
1486         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1487         rtm = NLMSG_DATA(nlh);
1488         rtm->rtm_family = AF_INET6;
1489         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1490         rtm->rtm_src_len = rt->rt6i_src.plen;
1491         rtm->rtm_tos = 0;
1492         rtm->rtm_table = RT_TABLE_MAIN;
1493         if (rt->rt6i_flags&RTF_REJECT)
1494                 rtm->rtm_type = RTN_UNREACHABLE;
1495         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1496                 rtm->rtm_type = RTN_LOCAL;
1497         else
1498                 rtm->rtm_type = RTN_UNICAST;
1499         rtm->rtm_flags = 0;
1500         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1501         rtm->rtm_protocol = rt->rt6i_protocol;
1502         if (rt->rt6i_flags&RTF_DYNAMIC)
1503                 rtm->rtm_protocol = RTPROT_REDIRECT;
1504         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1505                 rtm->rtm_protocol = RTPROT_KERNEL;
1506         else if (rt->rt6i_flags&RTF_DEFAULT)
1507                 rtm->rtm_protocol = RTPROT_RA;
1508
1509         if (rt->rt6i_flags&RTF_CACHE)
1510                 rtm->rtm_flags |= RTM_F_CLONED;
1511
1512         if (dst) {
1513                 RTA_PUT(skb, RTA_DST, 16, dst);
1514                 rtm->rtm_dst_len = 128;
1515         } else if (rtm->rtm_dst_len)
1516                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1517 #ifdef CONFIG_IPV6_SUBTREES
1518         if (src) {
1519                 RTA_PUT(skb, RTA_SRC, 16, src);
1520                 rtm->rtm_src_len = 128;
1521         } else if (rtm->rtm_src_len)
1522                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1523 #endif
1524         if (iif)
1525                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1526         else if (dst) {
1527                 struct in6_addr saddr_buf;
1528                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1529                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1530         }
1531         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1532                 goto rtattr_failure;
1533         if (rt->u.dst.neighbour)
1534                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1535         if (rt->u.dst.dev)
1536                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1537         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1538         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1539         if (rt->rt6i_expires)
1540                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1541         else
1542                 ci.rta_expires = 0;
1543         ci.rta_used = rt->u.dst.__use;
1544         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1545         ci.rta_error = rt->u.dst.error;
1546         ci.rta_id = 0;
1547         ci.rta_ts = 0;
1548         ci.rta_tsage = 0;
1549         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1550         nlh->nlmsg_len = skb->tail - b;
1551         return skb->len;
1552
1553 nlmsg_failure:
1554 rtattr_failure:
1555         skb_trim(skb, b - skb->data);
1556         return -1;
1557 }
1558
1559 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1560 {
1561         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1562         int prefix;
1563
1564         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1565                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1566                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1567         } else
1568                 prefix = 0;
1569
1570         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1571                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1572                      NULL, prefix);
1573 }
1574
1575 static int fib6_dump_node(struct fib6_walker_t *w)
1576 {
1577         int res;
1578         struct rt6_info *rt;
1579
1580         for (rt = w->leaf; rt; rt = rt->u.next) {
1581                 res = rt6_dump_route(rt, w->args);
1582                 if (res < 0) {
1583                         /* Frame is full, suspend walking */
1584                         w->leaf = rt;
1585                         return 1;
1586                 }
1587                 BUG_TRAP(res!=0);
1588         }
1589         w->leaf = NULL;
1590         return 0;
1591 }
1592
1593 static void fib6_dump_end(struct netlink_callback *cb)
1594 {
1595         struct fib6_walker_t *w = (void*)cb->args[0];
1596
1597         if (w) {
1598                 cb->args[0] = 0;
1599                 fib6_walker_unlink(w);
1600                 kfree(w);
1601         }
1602         if (cb->args[1]) {
1603                 cb->done = (void*)cb->args[1];
1604                 cb->args[1] = 0;
1605         }
1606 }
1607
1608 static int fib6_dump_done(struct netlink_callback *cb)
1609 {
1610         fib6_dump_end(cb);
1611         return cb->done(cb);
1612 }
1613
1614 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1615 {
1616         struct rt6_rtnl_dump_arg arg;
1617         struct fib6_walker_t *w;
1618         int res;
1619
1620         arg.skb = skb;
1621         arg.cb = cb;
1622
1623         w = (void*)cb->args[0];
1624         if (w == NULL) {
1625                 /* New dump:
1626                  * 
1627                  * 1. hook callback destructor.
1628                  */
1629                 cb->args[1] = (long)cb->done;
1630                 cb->done = fib6_dump_done;
1631
1632                 /*
1633                  * 2. allocate and initialize walker.
1634                  */
1635                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1636                 if (w == NULL)
1637                         return -ENOMEM;
1638                 RT6_TRACE("dump<%p", w);
1639                 memset(w, 0, sizeof(*w));
1640                 w->root = &ip6_routing_table;
1641                 w->func = fib6_dump_node;
1642                 w->args = &arg;
1643                 cb->args[0] = (long)w;
1644                 read_lock_bh(&rt6_lock);
1645                 res = fib6_walk(w);
1646                 read_unlock_bh(&rt6_lock);
1647         } else {
1648                 w->args = &arg;
1649                 read_lock_bh(&rt6_lock);
1650                 res = fib6_walk_continue(w);
1651                 read_unlock_bh(&rt6_lock);
1652         }
1653 #if RT6_DEBUG >= 3
1654         if (res <= 0 && skb->len == 0)
1655                 RT6_TRACE("%p>dump end\n", w);
1656 #endif
1657         res = res < 0 ? res : skb->len;
1658         /* res < 0 is an error. (really, impossible)
1659            res == 0 means that dump is complete, but skb still can contain data.
1660            res > 0 dump is not complete, but frame is full.
1661          */
1662         /* Destroy walker, if dump of this table is complete. */
1663         if (res <= 0)
1664                 fib6_dump_end(cb);
1665         return res;
1666 }
1667
1668 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1669 {
1670         struct rtattr **rta = arg;
1671         int iif = 0;
1672         int err = -ENOBUFS;
1673         struct sk_buff *skb;
1674         struct flowi fl;
1675         struct rt6_info *rt;
1676
1677         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1678         if (skb == NULL)
1679                 goto out;
1680
1681         /* Reserve room for dummy headers, this skb can pass
1682            through good chunk of routing engine.
1683          */
1684         skb->mac.raw = skb->data;
1685         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1686
1687         memset(&fl, 0, sizeof(fl));
1688         if (rta[RTA_SRC-1])
1689                 ipv6_addr_copy(&fl.fl6_src,
1690                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1691         if (rta[RTA_DST-1])
1692                 ipv6_addr_copy(&fl.fl6_dst,
1693                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1694
1695         if (rta[RTA_IIF-1])
1696                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1697
1698         if (iif) {
1699                 struct net_device *dev;
1700                 dev = __dev_get_by_index(iif);
1701                 if (!dev) {
1702                         err = -ENODEV;
1703                         goto out_free;
1704                 }
1705         }
1706
1707         fl.oif = 0;
1708         if (rta[RTA_OIF-1])
1709                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1710
1711         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1712
1713         skb->dst = &rt->u.dst;
1714
1715         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1716         err = rt6_fill_node(skb, rt, 
1717                             &fl.fl6_dst, &fl.fl6_src,
1718                             iif,
1719                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1720                             nlh->nlmsg_seq, nlh, 0);
1721         if (err < 0) {
1722                 err = -EMSGSIZE;
1723                 goto out_free;
1724         }
1725
1726         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1727         if (err > 0)
1728                 err = 0;
1729 out:
1730         return err;
1731 out_free:
1732         kfree_skb(skb);
1733         goto out;       
1734 }
1735
1736 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1737 {
1738         struct sk_buff *skb;
1739         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1740
1741         skb = alloc_skb(size, gfp_any());
1742         if (!skb) {
1743                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1744                 return;
1745         }
1746         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1747                 kfree_skb(skb);
1748                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1749                 return;
1750         }
1751         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1752         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1753 }
1754
1755 /*
1756  *      /proc
1757  */
1758
1759 #ifdef CONFIG_PROC_FS
1760
1761 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1762
1763 struct rt6_proc_arg
1764 {
1765         char *buffer;
1766         int offset;
1767         int length;
1768         int skip;
1769         int len;
1770 };
1771
1772 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1773 {
1774         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1775         int i;
1776
1777         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1778                 arg->skip++;
1779                 return 0;
1780         }
1781
1782         if (arg->len >= arg->length)
1783                 return 0;
1784
1785         for (i=0; i<16; i++) {
1786                 sprintf(arg->buffer + arg->len, "%02x",
1787                         rt->rt6i_dst.addr.s6_addr[i]);
1788                 arg->len += 2;
1789         }
1790         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1791                             rt->rt6i_dst.plen);
1792
1793 #ifdef CONFIG_IPV6_SUBTREES
1794         for (i=0; i<16; i++) {
1795                 sprintf(arg->buffer + arg->len, "%02x",
1796                         rt->rt6i_src.addr.s6_addr[i]);
1797                 arg->len += 2;
1798         }
1799         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1800                             rt->rt6i_src.plen);
1801 #else
1802         sprintf(arg->buffer + arg->len,
1803                 "00000000000000000000000000000000 00 ");
1804         arg->len += 36;
1805 #endif
1806
1807         if (rt->rt6i_nexthop) {
1808                 for (i=0; i<16; i++) {
1809                         sprintf(arg->buffer + arg->len, "%02x",
1810                                 rt->rt6i_nexthop->primary_key[i]);
1811                         arg->len += 2;
1812                 }
1813         } else {
1814                 sprintf(arg->buffer + arg->len,
1815                         "00000000000000000000000000000000");
1816                 arg->len += 32;
1817         }
1818         arg->len += sprintf(arg->buffer + arg->len,
1819                             " %08x %08x %08x %08x %8s\n",
1820                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1821                             rt->u.dst.__use, rt->rt6i_flags, 
1822                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1823         return 0;
1824 }
1825
1826 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1827 {
1828         struct rt6_proc_arg arg;
1829         arg.buffer = buffer;
1830         arg.offset = offset;
1831         arg.length = length;
1832         arg.skip = 0;
1833         arg.len = 0;
1834
1835         read_lock_bh(&rt6_lock);
1836         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1837         read_unlock_bh(&rt6_lock);
1838
1839         *start = buffer;
1840         if (offset)
1841                 *start += offset % RT6_INFO_LEN;
1842
1843         arg.len -= offset % RT6_INFO_LEN;
1844
1845         if (arg.len > length)
1846                 arg.len = length;
1847         if (arg.len < 0)
1848                 arg.len = 0;
1849
1850         return arg.len;
1851 }
1852
1853 extern struct rt6_statistics rt6_stats;
1854
1855 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1856 {
1857         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1858                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1859                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1860                       rt6_stats.fib_rt_cache,
1861                       atomic_read(&ip6_dst_ops.entries),
1862                       rt6_stats.fib_discarded_routes);
1863
1864         return 0;
1865 }
1866
1867 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1868 {
1869         return single_open(file, rt6_stats_seq_show, NULL);
1870 }
1871
1872 static struct file_operations rt6_stats_seq_fops = {
1873         .owner   = THIS_MODULE,
1874         .open    = rt6_stats_seq_open,
1875         .read    = seq_read,
1876         .llseek  = seq_lseek,
1877         .release = single_release,
1878 };
1879 #endif  /* CONFIG_PROC_FS */
1880
1881 #ifdef CONFIG_SYSCTL
1882
1883 static int flush_delay;
1884
1885 static
1886 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1887                               void *buffer, size_t *lenp)
1888 {
1889         if (write) {
1890                 proc_dointvec(ctl, write, filp, buffer, lenp);
1891                 if (flush_delay < 0)
1892                         flush_delay = 0;
1893                 fib6_run_gc((unsigned long)flush_delay);
1894                 return 0;
1895         } else
1896                 return -EINVAL;
1897 }
1898
1899 ctl_table ipv6_route_table[] = {
1900         {
1901                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1902                 .procname       =       "flush",
1903                 .data           =       &flush_delay,
1904                 .maxlen         =       sizeof(int),
1905                 .mode           =       0644,
1906                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1907         },
1908         {
1909                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1910                 .procname       =       "gc_thresh",
1911                 .data           =       &ip6_dst_ops.gc_thresh,
1912                 .maxlen         =       sizeof(int),
1913                 .mode           =       0644,
1914                 .proc_handler   =       &proc_dointvec,
1915         },
1916         {
1917                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1918                 .procname       =       "max_size",
1919                 .data           =       &ip6_rt_max_size,
1920                 .maxlen         =       sizeof(int),
1921                 .mode           =       0644,
1922                 .proc_handler   =       &proc_dointvec,
1923         },
1924         {
1925                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1926                 .procname       =       "gc_min_interval",
1927                 .data           =       &ip6_rt_gc_min_interval,
1928                 .maxlen         =       sizeof(int),
1929                 .mode           =       0644,
1930                 .proc_handler   =       &proc_dointvec_jiffies,
1931                 .strategy       =       &sysctl_jiffies,
1932         },
1933         {
1934                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1935                 .procname       =       "gc_timeout",
1936                 .data           =       &ip6_rt_gc_timeout,
1937                 .maxlen         =       sizeof(int),
1938                 .mode           =       0644,
1939                 .proc_handler   =       &proc_dointvec_jiffies,
1940                 .strategy       =       &sysctl_jiffies,
1941         },
1942         {
1943                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
1944                 .procname       =       "gc_interval",
1945                 .data           =       &ip6_rt_gc_interval,
1946                 .maxlen         =       sizeof(int),
1947                 .mode           =       0644,
1948                 .proc_handler   =       &proc_dointvec_jiffies,
1949                 .strategy       =       &sysctl_jiffies,
1950         },
1951         {
1952                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
1953                 .procname       =       "gc_elasticity",
1954                 .data           =       &ip6_rt_gc_elasticity,
1955                 .maxlen         =       sizeof(int),
1956                 .mode           =       0644,
1957                 .proc_handler   =       &proc_dointvec_jiffies,
1958                 .strategy       =       &sysctl_jiffies,
1959         },
1960         {
1961                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
1962                 .procname       =       "mtu_expires",
1963                 .data           =       &ip6_rt_mtu_expires,
1964                 .maxlen         =       sizeof(int),
1965                 .mode           =       0644,
1966                 .proc_handler   =       &proc_dointvec_jiffies,
1967                 .strategy       =       &sysctl_jiffies,
1968         },
1969         {
1970                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
1971                 .procname       =       "min_adv_mss",
1972                 .data           =       &ip6_rt_min_advmss,
1973                 .maxlen         =       sizeof(int),
1974                 .mode           =       0644,
1975                 .proc_handler   =       &proc_dointvec_jiffies,
1976                 .strategy       =       &sysctl_jiffies,
1977         },
1978         { .ctl_name = 0 }
1979 };
1980
1981 #endif
1982
1983 void __init ip6_route_init(void)
1984 {
1985         struct proc_dir_entry *p;
1986
1987         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1988                                                      sizeof(struct rt6_info),
1989                                                      0, SLAB_HWCACHE_ALIGN,
1990                                                      NULL, NULL);
1991         if (!ip6_dst_ops.kmem_cachep)
1992                 panic("cannot create ip6_dst_cache");
1993
1994         fib6_init();
1995 #ifdef  CONFIG_PROC_FS
1996         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
1997         if (p)
1998                 p->owner = THIS_MODULE;
1999
2000         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2001 #endif
2002 #ifdef CONFIG_XFRM
2003         xfrm6_init();
2004 #endif
2005 }
2006
2007 void __exit ip6_route_cleanup(void)
2008 {
2009 #ifdef CONFIG_PROC_FS
2010         proc_net_remove("ipv6_route");
2011         proc_net_remove("rt6_stats");
2012 #endif
2013 #ifdef CONFIG_XFRM
2014         xfrm6_fini();
2015 #endif
2016         rt6_ifdown(NULL);
2017         fib6_gc_cleanup();
2018         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2019 }