patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *, int how);
88 static int               ip6_dst_gc(void);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct sk_buff **pskb);
92 static void             ip6_link_failure(struct sk_buff *skb);
93 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
94
95 static struct dst_ops ip6_dst_ops = {
96         .family                 =       AF_INET6,
97         .protocol               =       __constant_htons(ETH_P_IPV6),
98         .gc                     =       ip6_dst_gc,
99         .gc_thresh              =       1024,
100         .check                  =       ip6_dst_check,
101         .destroy                =       ip6_dst_destroy,
102         .ifdown                 =       ip6_dst_ifdown,
103         .negative_advice        =       ip6_negative_advice,
104         .link_failure           =       ip6_link_failure,
105         .update_pmtu            =       ip6_rt_update_pmtu,
106         .entry_size             =       sizeof(struct rt6_info),
107 };
108
109 struct rt6_info ip6_null_entry = {
110         .u = {
111                 .dst = {
112                         .__refcnt       = ATOMIC_INIT(1),
113                         .__use          = 1,
114                         .dev            = &loopback_dev,
115                         .obsolete       = -1,
116                         .error          = -ENETUNREACH,
117                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
118                         .input          = ip6_pkt_discard,
119                         .output         = ip6_pkt_discard_out,
120                         .ops            = &ip6_dst_ops,
121                         .path           = (struct dst_entry*)&ip6_null_entry,
122                 }
123         },
124         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
125         .rt6i_metric    = ~(u32) 0,
126         .rt6i_ref       = ATOMIC_INIT(1),
127 };
128
129 struct fib6_node ip6_routing_table = {
130         .leaf           = &ip6_null_entry,
131         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
132 };
133
134 /* Protects all the ip6 fib */
135
136 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
137
138
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
141 {
142         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
143 }
144
145 static void ip6_dst_destroy(struct dst_entry *dst)
146 {
147         struct rt6_info *rt = (struct rt6_info *)dst;
148         struct inet6_dev *idev = rt->rt6i_idev;
149
150         if (idev != NULL) {
151                 rt->rt6i_idev = NULL;
152                 in6_dev_put(idev);
153         }       
154 }
155
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
157 {
158         ip6_dst_destroy(dst);
159 }
160
161 /*
162  *      Route lookup. Any rt6_lock is implied.
163  */
164
165 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
166                                                     int oif,
167                                                     int strict)
168 {
169         struct rt6_info *local = NULL;
170         struct rt6_info *sprt;
171
172         if (oif) {
173                 for (sprt = rt; sprt; sprt = sprt->u.next) {
174                         struct net_device *dev = sprt->rt6i_dev;
175                         if (dev->ifindex == oif)
176                                 return sprt;
177                         if (dev->flags&IFF_LOOPBACK)
178                                 local = sprt;
179                 }
180
181                 if (local)
182                         return local;
183
184                 if (strict)
185                         return &ip6_null_entry;
186         }
187         return rt;
188 }
189
190 /*
191  *      pointer to the last default router chosen. BH is disabled locally.
192  */
193 static struct rt6_info *rt6_dflt_pointer;
194 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
195
196 /* Default Router Selection (RFC 2461 6.3.6) */
197 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
198 {
199         struct rt6_info *match = NULL;
200         struct rt6_info *sprt;
201         int mpri = 0;
202
203         for (sprt = rt; sprt; sprt = sprt->u.next) {
204                 struct neighbour *neigh;
205                 int m = 0;
206
207                 if (!oif ||
208                     (sprt->rt6i_dev &&
209                      sprt->rt6i_dev->ifindex == oif))
210                         m += 8;
211
212                 if (sprt == rt6_dflt_pointer)
213                         m += 4;
214
215                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
216                         read_lock_bh(&neigh->lock);
217                         switch (neigh->nud_state) {
218                         case NUD_REACHABLE:
219                                 m += 3;
220                                 break;
221
222                         case NUD_STALE:
223                         case NUD_DELAY:
224                         case NUD_PROBE:
225                                 m += 2;
226                                 break;
227
228                         case NUD_NOARP:
229                         case NUD_PERMANENT:
230                                 m += 1;
231                                 break;
232
233                         case NUD_INCOMPLETE:
234                         default:
235                                 read_unlock_bh(&neigh->lock);
236                                 continue;
237                         }
238                         read_unlock_bh(&neigh->lock);
239                 } else {
240                         continue;
241                 }
242
243                 if (m > mpri || m >= 12) {
244                         match = sprt;
245                         mpri = m;
246                         if (m >= 12) {
247                                 /* we choose the last default router if it
248                                  * is in (probably) reachable state.
249                                  * If route changed, we should do pmtu
250                                  * discovery. --yoshfuji
251                                  */
252                                 break;
253                         }
254                 }
255         }
256
257         spin_lock(&rt6_dflt_lock);
258         if (!match) {
259                 /*
260                  *      No default routers are known to be reachable.
261                  *      SHOULD round robin
262                  */
263                 if (rt6_dflt_pointer) {
264                         for (sprt = rt6_dflt_pointer->u.next;
265                              sprt; sprt = sprt->u.next) {
266                                 if (sprt->u.dst.obsolete <= 0 &&
267                                     sprt->u.dst.error == 0) {
268                                         match = sprt;
269                                         break;
270                                 }
271                         }
272                         for (sprt = rt;
273                              !match && sprt;
274                              sprt = sprt->u.next) {
275                                 if (sprt->u.dst.obsolete <= 0 &&
276                                     sprt->u.dst.error == 0) {
277                                         match = sprt;
278                                         break;
279                                 }
280                                 if (sprt == rt6_dflt_pointer)
281                                         break;
282                         }
283                 }
284         }
285
286         if (match) {
287                 if (rt6_dflt_pointer != match)
288                         RT6_TRACE("changed default router: %p->%p\n",
289                                   rt6_dflt_pointer, match);
290                 rt6_dflt_pointer = match;
291         }
292         spin_unlock(&rt6_dflt_lock);
293
294         if (!match) {
295                 /*
296                  * Last Resort: if no default routers found, 
297                  * use addrconf default route.
298                  * We don't record this route.
299                  */
300                 for (sprt = ip6_routing_table.leaf;
301                      sprt; sprt = sprt->u.next) {
302                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
303                             (!oif ||
304                              (sprt->rt6i_dev &&
305                               sprt->rt6i_dev->ifindex == oif))) {
306                                 match = sprt;
307                                 break;
308                         }
309                 }
310                 if (!match) {
311                         /* no default route.  give up. */
312                         match = &ip6_null_entry;
313                 }
314         }
315
316         return match;
317 }
318
319 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
320                             int oif, int strict)
321 {
322         struct fib6_node *fn;
323         struct rt6_info *rt;
324
325         read_lock_bh(&rt6_lock);
326         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
327         rt = rt6_device_match(fn->leaf, oif, strict);
328         dst_hold(&rt->u.dst);
329         rt->u.dst.__use++;
330         read_unlock_bh(&rt6_lock);
331
332         rt->u.dst.lastuse = jiffies;
333         if (rt->u.dst.error == 0)
334                 return rt;
335         dst_release(&rt->u.dst);
336         return NULL;
337 }
338
339 /* rt6_ins is called with FREE rt6_lock.
340    It takes new route entry, the addition fails by any reason the
341    route is freed. In any case, if caller does not hold it, it may
342    be destroyed.
343  */
344
345 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
346 {
347         int err;
348
349         write_lock_bh(&rt6_lock);
350         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
351         write_unlock_bh(&rt6_lock);
352
353         return err;
354 }
355
356 /* No rt6_lock! If COW failed, the function returns dead route entry
357    with dst->error set to errno value.
358  */
359
360 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
361                                 struct in6_addr *saddr)
362 {
363         int err;
364         struct rt6_info *rt;
365
366         /*
367          *      Clone the route.
368          */
369
370         rt = ip6_rt_copy(ort);
371
372         if (rt) {
373                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
374
375                 if (!(rt->rt6i_flags&RTF_GATEWAY))
376                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
377
378                 rt->rt6i_dst.plen = 128;
379                 rt->rt6i_flags |= RTF_CACHE;
380                 rt->u.dst.flags |= DST_HOST;
381
382 #ifdef CONFIG_IPV6_SUBTREES
383                 if (rt->rt6i_src.plen && saddr) {
384                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
385                         rt->rt6i_src.plen = 128;
386                 }
387 #endif
388
389                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
390
391                 dst_hold(&rt->u.dst);
392
393                 err = rt6_ins(rt, NULL, NULL);
394                 if (err == 0)
395                         return rt;
396
397                 rt->u.dst.error = err;
398
399                 return rt;
400         }
401         dst_hold(&ip6_null_entry.u.dst);
402         return &ip6_null_entry;
403 }
404
405 #define BACKTRACK() \
406 if (rt == &ip6_null_entry && strict) { \
407        while ((fn = fn->parent) != NULL) { \
408                 if (fn->fn_flags & RTN_ROOT) { \
409                         dst_hold(&rt->u.dst); \
410                         goto out; \
411                 } \
412                 if (fn->fn_flags & RTN_RTINFO) \
413                         goto restart; \
414         } \
415 }
416
417
418 void ip6_route_input(struct sk_buff *skb)
419 {
420         struct fib6_node *fn;
421         struct rt6_info *rt;
422         int strict;
423         int attempts = 3;
424
425         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
426
427 relookup:
428         read_lock_bh(&rt6_lock);
429
430         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
431                          &skb->nh.ipv6h->saddr);
432
433 restart:
434         rt = fn->leaf;
435
436         if ((rt->rt6i_flags & RTF_CACHE)) {
437                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
438                 BACKTRACK();
439                 dst_hold(&rt->u.dst);
440                 goto out;
441         }
442
443         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
444         BACKTRACK();
445
446         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
447                 read_unlock_bh(&rt6_lock);
448
449                 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
450                              &skb->nh.ipv6h->saddr);
451                         
452                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
453                         goto out2;
454                 /* Race condition! In the gap, when rt6_lock was
455                    released someone could insert this route.  Relookup.
456                 */
457                 dst_release(&rt->u.dst);
458                 goto relookup;
459         }
460         dst_hold(&rt->u.dst);
461
462 out:
463         read_unlock_bh(&rt6_lock);
464 out2:
465         rt->u.dst.lastuse = jiffies;
466         rt->u.dst.__use++;
467         skb->dst = (struct dst_entry *) rt;
468 }
469
470 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
471 {
472         struct fib6_node *fn;
473         struct rt6_info *rt;
474         int strict;
475         int attempts = 3;
476
477         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
478
479 relookup:
480         read_lock_bh(&rt6_lock);
481
482         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
483
484 restart:
485         rt = fn->leaf;
486
487         if ((rt->rt6i_flags & RTF_CACHE)) {
488                 rt = rt6_device_match(rt, fl->oif, strict);
489                 BACKTRACK();
490                 dst_hold(&rt->u.dst);
491                 goto out;
492         }
493         if (rt->rt6i_flags & RTF_DEFAULT) {
494                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
495                         rt = rt6_best_dflt(rt, fl->oif);
496         } else {
497                 rt = rt6_device_match(rt, fl->oif, strict);
498                 BACKTRACK();
499         }
500
501         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
502                 read_unlock_bh(&rt6_lock);
503
504                 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
505
506                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
507                         goto out2;
508
509                 /* Race condition! In the gap, when rt6_lock was
510                    released someone could insert this route.  Relookup.
511                 */
512                 dst_release(&rt->u.dst);
513                 goto relookup;
514         }
515         dst_hold(&rt->u.dst);
516
517 out:
518         read_unlock_bh(&rt6_lock);
519 out2:
520         rt->u.dst.lastuse = jiffies;
521         rt->u.dst.__use++;
522         return &rt->u.dst;
523 }
524
525
526 /*
527  *      Destination cache support functions
528  */
529
530 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
531 {
532         struct rt6_info *rt;
533
534         rt = (struct rt6_info *) dst;
535
536         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
537                 return dst;
538
539         dst_release(dst);
540         return NULL;
541 }
542
543 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
544 {
545         struct rt6_info *rt = (struct rt6_info *) dst;
546
547         if (rt) {
548                 if (rt->rt6i_flags & RTF_CACHE)
549                         ip6_del_rt(rt, NULL, NULL);
550                 else
551                         dst_release(dst);
552         }
553         return NULL;
554 }
555
556 static void ip6_link_failure(struct sk_buff *skb)
557 {
558         struct rt6_info *rt;
559
560         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
561
562         rt = (struct rt6_info *) skb->dst;
563         if (rt) {
564                 if (rt->rt6i_flags&RTF_CACHE) {
565                         dst_set_expires(&rt->u.dst, 0);
566                         rt->rt6i_flags |= RTF_EXPIRES;
567                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
568                         rt->rt6i_node->fn_sernum = -1;
569         }
570 }
571
572 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
573 {
574         struct rt6_info *rt6 = (struct rt6_info*)dst;
575
576         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
577                 rt6->rt6i_flags |= RTF_MODIFIED;
578                 if (mtu < IPV6_MIN_MTU)
579                         mtu = IPV6_MIN_MTU;
580                 dst->metrics[RTAX_MTU-1] = mtu;
581         }
582 }
583
584 /* Protected by rt6_lock.  */
585 static struct dst_entry *ndisc_dst_gc_list;
586 static int ipv6_get_mtu(struct net_device *dev);
587 static inline unsigned int ipv6_advmss(unsigned int mtu);
588
589 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
590                                   struct neighbour *neigh,
591                                   struct in6_addr *addr,
592                                   int (*output)(struct sk_buff **))
593 {
594         struct rt6_info *rt = ip6_dst_alloc();
595
596         if (unlikely(rt == NULL))
597                 goto out;
598
599         dev_hold(dev);
600         if (neigh)
601                 neigh_hold(neigh);
602         else
603                 neigh = ndisc_get_neigh(dev, addr);
604
605         rt->rt6i_dev      = dev;
606         rt->rt6i_idev     = in6_dev_get(dev);
607         rt->rt6i_nexthop  = neigh;
608         rt->rt6i_expires  = 0;
609         rt->rt6i_flags    = RTF_LOCAL;
610         rt->rt6i_metric   = 0;
611         atomic_set(&rt->u.dst.__refcnt, 1);
612         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
613         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
614         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
615         rt->u.dst.output  = output;
616
617         write_lock_bh(&rt6_lock);
618         rt->u.dst.next = ndisc_dst_gc_list;
619         ndisc_dst_gc_list = &rt->u.dst;
620         write_unlock_bh(&rt6_lock);
621
622         fib6_force_start_gc();
623
624 out:
625         return (struct dst_entry *)rt;
626 }
627
628 int ndisc_dst_gc(int *more)
629 {
630         struct dst_entry *dst, *next, **pprev;
631         int freed;
632
633         next = NULL;
634         pprev = &ndisc_dst_gc_list;
635         freed = 0;
636         while ((dst = *pprev) != NULL) {
637                 if (!atomic_read(&dst->__refcnt)) {
638                         *pprev = dst->next;
639                         dst_free(dst);
640                         freed++;
641                 } else {
642                         pprev = &dst->next;
643                         (*more)++;
644                 }
645         }
646
647         return freed;
648 }
649
650 static int ip6_dst_gc(void)
651 {
652         static unsigned expire = 30*HZ;
653         static unsigned long last_gc;
654         unsigned long now = jiffies;
655
656         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
657             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
658                 goto out;
659
660         expire++;
661         fib6_run_gc(expire);
662         last_gc = now;
663         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
664                 expire = ip6_rt_gc_timeout>>1;
665
666 out:
667         expire -= expire>>ip6_rt_gc_elasticity;
668         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
669 }
670
671 /* Clean host part of a prefix. Not necessary in radix tree,
672    but results in cleaner routing tables.
673
674    Remove it only when all the things will work!
675  */
676
677 static int ipv6_get_mtu(struct net_device *dev)
678 {
679         int mtu = IPV6_MIN_MTU;
680         struct inet6_dev *idev;
681
682         idev = in6_dev_get(dev);
683         if (idev) {
684                 mtu = idev->cnf.mtu6;
685                 in6_dev_put(idev);
686         }
687         return mtu;
688 }
689
690 static inline unsigned int ipv6_advmss(unsigned int mtu)
691 {
692         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
693
694         if (mtu < ip6_rt_min_advmss)
695                 mtu = ip6_rt_min_advmss;
696
697         /*
698          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
699          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
700          * IPV6_MAXPLEN is also valid and means: "any MSS, 
701          * rely only on pmtu discovery"
702          */
703         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
704                 mtu = IPV6_MAXPLEN;
705         return mtu;
706 }
707
708 static int ipv6_get_hoplimit(struct net_device *dev)
709 {
710         int hoplimit = ipv6_devconf.hop_limit;
711         struct inet6_dev *idev;
712
713         idev = in6_dev_get(dev);
714         if (idev) {
715                 hoplimit = idev->cnf.hop_limit;
716                 in6_dev_put(idev);
717         }
718         return hoplimit;
719 }
720
721 /*
722  *
723  */
724
725 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
726 {
727         int err;
728         struct rtmsg *r;
729         struct rtattr **rta;
730         struct rt6_info *rt;
731         struct net_device *dev = NULL;
732         int addr_type;
733
734         rta = (struct rtattr **) _rtattr;
735
736         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
737                 return -EINVAL;
738 #ifndef CONFIG_IPV6_SUBTREES
739         if (rtmsg->rtmsg_src_len)
740                 return -EINVAL;
741 #endif
742         if (rtmsg->rtmsg_ifindex) {
743                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
744                 if (!dev)
745                         return -ENODEV;
746         }
747
748         if (rtmsg->rtmsg_metric == 0)
749                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
750
751         rt = ip6_dst_alloc();
752
753         if (rt == NULL)
754                 return -ENOMEM;
755
756         rt->u.dst.obsolete = -1;
757         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
758         if (nlh && (r = NLMSG_DATA(nlh))) {
759                 rt->rt6i_protocol = r->rtm_protocol;
760         } else {
761                 rt->rt6i_protocol = RTPROT_BOOT;
762         }
763
764         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
765
766         if (addr_type & IPV6_ADDR_MULTICAST)
767                 rt->u.dst.input = ip6_mc_input;
768         else
769                 rt->u.dst.input = ip6_forward;
770
771         rt->u.dst.output = ip6_output;
772
773         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
774                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
775         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
776         if (rt->rt6i_dst.plen == 128)
777                rt->u.dst.flags = DST_HOST;
778
779 #ifdef CONFIG_IPV6_SUBTREES
780         ipv6_addr_prefix(&rt->rt6i_src.addr, 
781                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
782         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
783 #endif
784
785         rt->rt6i_metric = rtmsg->rtmsg_metric;
786
787         /* We cannot add true routes via loopback here,
788            they would result in kernel looping; promote them to reject routes
789          */
790         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
791             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
792                 if (dev)
793                         dev_put(dev);
794                 dev = &loopback_dev;
795                 dev_hold(dev);
796                 rt->u.dst.output = ip6_pkt_discard_out;
797                 rt->u.dst.input = ip6_pkt_discard;
798                 rt->u.dst.error = -ENETUNREACH;
799                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
800                 goto install_route;
801         }
802
803         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
804                 struct in6_addr *gw_addr;
805                 int gwa_type;
806
807                 gw_addr = &rtmsg->rtmsg_gateway;
808                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
809                 gwa_type = ipv6_addr_type(gw_addr);
810
811                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
812                         struct rt6_info *grt;
813
814                         /* IPv6 strictly inhibits using not link-local
815                            addresses as nexthop address.
816                            Otherwise, router will not able to send redirects.
817                            It is very good, but in some (rare!) circumstances
818                            (SIT, PtP, NBMA NOARP links) it is handy to allow
819                            some exceptions. --ANK
820                          */
821                         err = -EINVAL;
822                         if (!(gwa_type&IPV6_ADDR_UNICAST))
823                                 goto out;
824
825                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
826
827                         err = -EHOSTUNREACH;
828                         if (grt == NULL)
829                                 goto out;
830                         if (dev) {
831                                 if (dev != grt->rt6i_dev) {
832                                         dst_release(&grt->u.dst);
833                                         goto out;
834                                 }
835                         } else {
836                                 dev = grt->rt6i_dev;
837                                 dev_hold(dev);
838                         }
839                         if (!(grt->rt6i_flags&RTF_GATEWAY))
840                                 err = 0;
841                         dst_release(&grt->u.dst);
842
843                         if (err)
844                                 goto out;
845                 }
846                 err = -EINVAL;
847                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
848                         goto out;
849         }
850
851         err = -ENODEV;
852         if (dev == NULL)
853                 goto out;
854
855         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
856                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
857                 if (IS_ERR(rt->rt6i_nexthop)) {
858                         err = PTR_ERR(rt->rt6i_nexthop);
859                         rt->rt6i_nexthop = NULL;
860                         goto out;
861                 }
862         }
863
864         rt->rt6i_flags = rtmsg->rtmsg_flags;
865
866 install_route:
867         if (rta && rta[RTA_METRICS-1]) {
868                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
869                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
870
871                 while (RTA_OK(attr, attrlen)) {
872                         unsigned flavor = attr->rta_type;
873                         if (flavor) {
874                                 if (flavor > RTAX_MAX) {
875                                         err = -EINVAL;
876                                         goto out;
877                                 }
878                                 rt->u.dst.metrics[flavor-1] =
879                                         *(u32 *)RTA_DATA(attr);
880                         }
881                         attr = RTA_NEXT(attr, attrlen);
882                 }
883         }
884
885         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
886                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
887                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
888                                 IPV6_DEFAULT_MCASTHOPS;
889                 else
890                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
891                                 ipv6_get_hoplimit(dev);
892         }
893
894         if (!rt->u.dst.metrics[RTAX_MTU-1])
895                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
896         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
897                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
898         rt->u.dst.dev = dev;
899         rt->rt6i_idev = in6_dev_get(dev);
900         return rt6_ins(rt, nlh, _rtattr);
901
902 out:
903         if (dev)
904                 dev_put(dev);
905         dst_free((struct dst_entry *) rt);
906         return err;
907 }
908
909 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
910 {
911         int err;
912
913         write_lock_bh(&rt6_lock);
914
915         spin_lock_bh(&rt6_dflt_lock);
916         rt6_dflt_pointer = NULL;
917         spin_unlock_bh(&rt6_dflt_lock);
918
919         dst_release(&rt->u.dst);
920
921         err = fib6_del(rt, nlh, _rtattr);
922         write_unlock_bh(&rt6_lock);
923
924         return err;
925 }
926
927 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
928 {
929         struct fib6_node *fn;
930         struct rt6_info *rt;
931         int err = -ESRCH;
932
933         read_lock_bh(&rt6_lock);
934
935         fn = fib6_locate(&ip6_routing_table,
936                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
937                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
938         
939         if (fn) {
940                 for (rt = fn->leaf; rt; rt = rt->u.next) {
941                         if (rtmsg->rtmsg_ifindex &&
942                             (rt->rt6i_dev == NULL ||
943                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
944                                 continue;
945                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
946                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
947                                 continue;
948                         if (rtmsg->rtmsg_metric &&
949                             rtmsg->rtmsg_metric != rt->rt6i_metric)
950                                 continue;
951                         dst_hold(&rt->u.dst);
952                         read_unlock_bh(&rt6_lock);
953
954                         return ip6_del_rt(rt, nlh, _rtattr);
955                 }
956         }
957         read_unlock_bh(&rt6_lock);
958
959         return err;
960 }
961
962 /*
963  *      Handle redirects
964  */
965 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
966                   struct neighbour *neigh, int on_link)
967 {
968         struct rt6_info *rt, *nrt;
969
970         /* Locate old route to this destination. */
971         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
972
973         if (rt == NULL)
974                 return;
975
976         if (neigh->dev != rt->rt6i_dev)
977                 goto out;
978
979         /* Redirect received -> path was valid.
980            Look, redirects are sent only in response to data packets,
981            so that this nexthop apparently is reachable. --ANK
982          */
983         dst_confirm(&rt->u.dst);
984
985         /* Duplicate redirect: silently ignore. */
986         if (neigh == rt->u.dst.neighbour)
987                 goto out;
988
989         /* Current route is on-link; redirect is always invalid.
990            
991            Seems, previous statement is not true. It could
992            be node, which looks for us as on-link (f.e. proxy ndisc)
993            But then router serving it might decide, that we should
994            know truth 8)8) --ANK (980726).
995          */
996         if (!(rt->rt6i_flags&RTF_GATEWAY))
997                 goto out;
998
999         /*
1000          *      RFC 2461 specifies that redirects should only be
1001          *      accepted if they come from the nexthop to the target.
1002          *      Due to the way default routers are chosen, this notion
1003          *      is a bit fuzzy and one might need to check all default
1004          *      routers.
1005          */
1006
1007         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1008                 if (rt->rt6i_flags & RTF_DEFAULT) {
1009                         struct rt6_info *rt1;
1010
1011                         read_lock(&rt6_lock);
1012                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1013                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
1014                                         dst_hold(&rt1->u.dst);
1015                                         dst_release(&rt->u.dst);
1016                                         read_unlock(&rt6_lock);
1017                                         rt = rt1;
1018                                         goto source_ok;
1019                                 }
1020                         }
1021                         read_unlock(&rt6_lock);
1022                 }
1023                 if (net_ratelimit())
1024                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1025                                "for redirect target\n");
1026                 goto out;
1027         }
1028
1029 source_ok:
1030
1031         /*
1032          *      We have finally decided to accept it.
1033          */
1034
1035         nrt = ip6_rt_copy(rt);
1036         if (nrt == NULL)
1037                 goto out;
1038
1039         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1040         if (on_link)
1041                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1042
1043         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1044         nrt->rt6i_dst.plen = 128;
1045         nrt->u.dst.flags |= DST_HOST;
1046
1047         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1048         nrt->rt6i_nexthop = neigh_clone(neigh);
1049         /* Reset pmtu, it may be better */
1050         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1051         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1052
1053         if (rt6_ins(nrt, NULL, NULL))
1054                 goto out;
1055
1056         if (rt->rt6i_flags&RTF_CACHE) {
1057                 ip6_del_rt(rt, NULL, NULL);
1058                 return;
1059         }
1060
1061 out:
1062         dst_release(&rt->u.dst);
1063         return;
1064 }
1065
1066 /*
1067  *      Handle ICMP "packet too big" messages
1068  *      i.e. Path MTU discovery
1069  */
1070
1071 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1072                         struct net_device *dev, u32 pmtu)
1073 {
1074         struct rt6_info *rt, *nrt;
1075
1076         if (pmtu < IPV6_MIN_MTU) {
1077                 if (net_ratelimit())
1078                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1079                                pmtu);
1080                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1081                    link MTU if the node receives a Packet Too Big message
1082                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1083                    */
1084                 pmtu = IPV6_MIN_MTU;
1085         }
1086
1087         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1088
1089         if (rt == NULL)
1090                 return;
1091
1092         if (pmtu >= dst_pmtu(&rt->u.dst))
1093                 goto out;
1094
1095         /* New mtu received -> path was valid.
1096            They are sent only in response to data packets,
1097            so that this nexthop apparently is reachable. --ANK
1098          */
1099         dst_confirm(&rt->u.dst);
1100
1101         /* Host route. If it is static, it would be better
1102            not to override it, but add new one, so that
1103            when cache entry will expire old pmtu
1104            would return automatically.
1105          */
1106         if (rt->rt6i_flags & RTF_CACHE) {
1107                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1108                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1109                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1110                 goto out;
1111         }
1112
1113         /* Network route.
1114            Two cases are possible:
1115            1. It is connected route. Action: COW
1116            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1117          */
1118         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1119                 nrt = rt6_cow(rt, daddr, saddr);
1120                 if (!nrt->u.dst.error) {
1121                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1122                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1123                            happened within 5 mins, the recommended timer is 10 mins.
1124                            Here this route expiration time is set to ip6_rt_mtu_expires
1125                            which is 10 mins. After 10 mins the decreased pmtu is expired
1126                            and detecting PMTU increase will be automatically happened.
1127                          */
1128                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1129                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1130                 }
1131                 dst_release(&nrt->u.dst);
1132         } else {
1133                 nrt = ip6_rt_copy(rt);
1134                 if (nrt == NULL)
1135                         goto out;
1136                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1137                 nrt->rt6i_dst.plen = 128;
1138                 nrt->u.dst.flags |= DST_HOST;
1139                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1140                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1141                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1142                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1143                 rt6_ins(nrt, NULL, NULL);
1144         }
1145
1146 out:
1147         dst_release(&rt->u.dst);
1148 }
1149
1150 /*
1151  *      Misc support functions
1152  */
1153
1154 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1155 {
1156         struct rt6_info *rt = ip6_dst_alloc();
1157
1158         if (rt) {
1159                 rt->u.dst.input = ort->u.dst.input;
1160                 rt->u.dst.output = ort->u.dst.output;
1161
1162                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1163                 rt->u.dst.dev = ort->u.dst.dev;
1164                 if (rt->u.dst.dev)
1165                         dev_hold(rt->u.dst.dev);
1166                 rt->rt6i_idev = ort->rt6i_idev;
1167                 if (rt->rt6i_idev)
1168                         in6_dev_hold(rt->rt6i_idev);
1169                 rt->u.dst.lastuse = jiffies;
1170                 rt->rt6i_expires = 0;
1171
1172                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1173                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1174                 rt->rt6i_metric = 0;
1175
1176                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1177 #ifdef CONFIG_IPV6_SUBTREES
1178                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1179 #endif
1180         }
1181         return rt;
1182 }
1183
1184 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1185 {       
1186         struct rt6_info *rt;
1187         struct fib6_node *fn;
1188
1189         fn = &ip6_routing_table;
1190
1191         write_lock_bh(&rt6_lock);
1192         for (rt = fn->leaf; rt; rt=rt->u.next) {
1193                 if (dev == rt->rt6i_dev &&
1194                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1195                         break;
1196         }
1197         if (rt)
1198                 dst_hold(&rt->u.dst);
1199         write_unlock_bh(&rt6_lock);
1200         return rt;
1201 }
1202
1203 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1204                                      struct net_device *dev)
1205 {
1206         struct in6_rtmsg rtmsg;
1207
1208         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1209         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1210         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1211         rtmsg.rtmsg_metric = 1024;
1212         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1213
1214         rtmsg.rtmsg_ifindex = dev->ifindex;
1215
1216         ip6_route_add(&rtmsg, NULL, NULL);
1217         return rt6_get_dflt_router(gwaddr, dev);
1218 }
1219
1220 void rt6_purge_dflt_routers(int last_resort)
1221 {
1222         struct rt6_info *rt;
1223         u32 flags;
1224
1225         if (last_resort)
1226                 flags = RTF_ALLONLINK;
1227         else
1228                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1229
1230 restart:
1231         read_lock_bh(&rt6_lock);
1232         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1233                 if (rt->rt6i_flags & flags) {
1234                         dst_hold(&rt->u.dst);
1235
1236                         spin_lock_bh(&rt6_dflt_lock);
1237                         rt6_dflt_pointer = NULL;
1238                         spin_unlock_bh(&rt6_dflt_lock);
1239
1240                         read_unlock_bh(&rt6_lock);
1241
1242                         ip6_del_rt(rt, NULL, NULL);
1243
1244                         goto restart;
1245                 }
1246         }
1247         read_unlock_bh(&rt6_lock);
1248 }
1249
1250 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1251 {
1252         struct in6_rtmsg rtmsg;
1253         int err;
1254
1255         switch(cmd) {
1256         case SIOCADDRT:         /* Add a route */
1257         case SIOCDELRT:         /* Delete a route */
1258                 if (!capable(CAP_NET_ADMIN))
1259                         return -EPERM;
1260                 err = copy_from_user(&rtmsg, arg,
1261                                      sizeof(struct in6_rtmsg));
1262                 if (err)
1263                         return -EFAULT;
1264                         
1265                 rtnl_lock();
1266                 switch (cmd) {
1267                 case SIOCADDRT:
1268                         err = ip6_route_add(&rtmsg, NULL, NULL);
1269                         break;
1270                 case SIOCDELRT:
1271                         err = ip6_route_del(&rtmsg, NULL, NULL);
1272                         break;
1273                 default:
1274                         err = -EINVAL;
1275                 }
1276                 rtnl_unlock();
1277
1278                 return err;
1279         };
1280
1281         return -EINVAL;
1282 }
1283
1284 /*
1285  *      Drop the packet on the floor
1286  */
1287
1288 int ip6_pkt_discard(struct sk_buff *skb)
1289 {
1290         IP6_INC_STATS(OutNoRoutes);
1291         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1292         kfree_skb(skb);
1293         return 0;
1294 }
1295
1296 int ip6_pkt_discard_out(struct sk_buff **pskb)
1297 {
1298         return ip6_pkt_discard(*pskb);
1299 }
1300
1301 /*
1302  *      Add address
1303  */
1304
1305 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1306 {
1307         struct rt6_info *rt = ip6_dst_alloc();
1308
1309         if (rt == NULL)
1310                 return -ENOMEM;
1311
1312         dev_hold(&loopback_dev);
1313
1314         rt->u.dst.flags = DST_HOST;
1315         rt->u.dst.input = ip6_input;
1316         rt->u.dst.output = ip6_output;
1317         rt->rt6i_dev = &loopback_dev;
1318         rt->rt6i_idev = in6_dev_get(&loopback_dev);
1319         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1320         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1321         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1322         rt->u.dst.obsolete = -1;
1323
1324         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1325         if (!anycast)
1326                 rt->rt6i_flags |= RTF_LOCAL;
1327         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1328         if (rt->rt6i_nexthop == NULL) {
1329                 dst_free((struct dst_entry *) rt);
1330                 return -ENOMEM;
1331         }
1332
1333         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1334         rt->rt6i_dst.plen = 128;
1335         rt6_ins(rt, NULL, NULL);
1336
1337         return 0;
1338 }
1339
1340 /* Delete address. Warning: you should check that this address
1341    disappeared before calling this function.
1342  */
1343
1344 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1345 {
1346         struct rt6_info *rt;
1347         int err = -ENOENT;
1348
1349         rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1350         if (rt) {
1351                 if (rt->rt6i_dst.plen == 128)
1352                         err = ip6_del_rt(rt, NULL, NULL);
1353                 else
1354                         dst_release(&rt->u.dst);
1355         }
1356
1357         return err;
1358 }
1359
1360 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1361 {
1362         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1363             rt != &ip6_null_entry) {
1364                 RT6_TRACE("deleted by ifdown %p\n", rt);
1365                 return -1;
1366         }
1367         return 0;
1368 }
1369
1370 void rt6_ifdown(struct net_device *dev)
1371 {
1372         write_lock_bh(&rt6_lock);
1373         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1374         write_unlock_bh(&rt6_lock);
1375 }
1376
1377 struct rt6_mtu_change_arg
1378 {
1379         struct net_device *dev;
1380         unsigned mtu;
1381 };
1382
1383 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1384 {
1385         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1386         struct inet6_dev *idev;
1387
1388         /* In IPv6 pmtu discovery is not optional,
1389            so that RTAX_MTU lock cannot disable it.
1390            We still use this lock to block changes
1391            caused by addrconf/ndisc.
1392         */
1393
1394         idev = __in6_dev_get(arg->dev);
1395         if (idev == NULL)
1396                 return 0;
1397
1398         /* For administrative MTU increase, there is no way to discover
1399            IPv6 PMTU increase, so PMTU increase should be updated here.
1400            Since RFC 1981 doesn't include administrative MTU increase
1401            update PMTU increase is a MUST. (i.e. jumbo frame)
1402          */
1403         /*
1404            If new MTU is less than route PMTU, this new MTU will be the
1405            lowest MTU in the path, update the route PMTU to reflect PMTU
1406            decreases; if new MTU is greater than route PMTU, and the
1407            old MTU is the lowest MTU in the path, update the route PMTU
1408            to reflect the increase. In this case if the other nodes' MTU
1409            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1410            PMTU discouvery.
1411          */
1412         if (rt->rt6i_dev == arg->dev &&
1413             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1414             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1415              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1416               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1417                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1418         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1419         return 0;
1420 }
1421
1422 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1423 {
1424         struct rt6_mtu_change_arg arg;
1425
1426         arg.dev = dev;
1427         arg.mtu = mtu;
1428         read_lock_bh(&rt6_lock);
1429         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1430         read_unlock_bh(&rt6_lock);
1431 }
1432
1433 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1434                               struct in6_rtmsg *rtmsg)
1435 {
1436         memset(rtmsg, 0, sizeof(*rtmsg));
1437
1438         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1439         rtmsg->rtmsg_src_len = r->rtm_src_len;
1440         rtmsg->rtmsg_flags = RTF_UP;
1441         if (r->rtm_type == RTN_UNREACHABLE)
1442                 rtmsg->rtmsg_flags |= RTF_REJECT;
1443
1444         if (rta[RTA_GATEWAY-1]) {
1445                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1446                         return -EINVAL;
1447                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1448                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1449         }
1450         if (rta[RTA_DST-1]) {
1451                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1452                         return -EINVAL;
1453                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1454         }
1455         if (rta[RTA_SRC-1]) {
1456                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1457                         return -EINVAL;
1458                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1459         }
1460         if (rta[RTA_OIF-1]) {
1461                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1462                         return -EINVAL;
1463                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1464         }
1465         if (rta[RTA_PRIORITY-1]) {
1466                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1467                         return -EINVAL;
1468                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1469         }
1470         return 0;
1471 }
1472
1473 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1474 {
1475         struct rtmsg *r = NLMSG_DATA(nlh);
1476         struct in6_rtmsg rtmsg;
1477
1478         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1479                 return -EINVAL;
1480         return ip6_route_del(&rtmsg, nlh, arg);
1481 }
1482
1483 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1484 {
1485         struct rtmsg *r = NLMSG_DATA(nlh);
1486         struct in6_rtmsg rtmsg;
1487
1488         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1489                 return -EINVAL;
1490         return ip6_route_add(&rtmsg, nlh, arg);
1491 }
1492
1493 struct rt6_rtnl_dump_arg
1494 {
1495         struct sk_buff *skb;
1496         struct netlink_callback *cb;
1497 };
1498
1499 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1500                          struct in6_addr *dst,
1501                          struct in6_addr *src,
1502                          int iif,
1503                          int type, u32 pid, u32 seq,
1504                          struct nlmsghdr *in_nlh, int prefix)
1505 {
1506         struct rtmsg *rtm;
1507         struct nlmsghdr  *nlh;
1508         unsigned char    *b = skb->tail;
1509         struct rta_cacheinfo ci;
1510
1511         if (prefix) {   /* user wants prefix routes only */
1512                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1513                         /* success since this is not a prefix route */
1514                         return 1;
1515                 }
1516         }
1517
1518         if (!pid && in_nlh) {
1519                 pid = in_nlh->nlmsg_pid;
1520         }
1521
1522         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1523         rtm = NLMSG_DATA(nlh);
1524         rtm->rtm_family = AF_INET6;
1525         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1526         rtm->rtm_src_len = rt->rt6i_src.plen;
1527         rtm->rtm_tos = 0;
1528         rtm->rtm_table = RT_TABLE_MAIN;
1529         if (rt->rt6i_flags&RTF_REJECT)
1530                 rtm->rtm_type = RTN_UNREACHABLE;
1531         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1532                 rtm->rtm_type = RTN_LOCAL;
1533         else
1534                 rtm->rtm_type = RTN_UNICAST;
1535         rtm->rtm_flags = 0;
1536         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1537         rtm->rtm_protocol = rt->rt6i_protocol;
1538         if (rt->rt6i_flags&RTF_DYNAMIC)
1539                 rtm->rtm_protocol = RTPROT_REDIRECT;
1540         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1541                 rtm->rtm_protocol = RTPROT_KERNEL;
1542         else if (rt->rt6i_flags&RTF_DEFAULT)
1543                 rtm->rtm_protocol = RTPROT_RA;
1544
1545         if (rt->rt6i_flags&RTF_CACHE)
1546                 rtm->rtm_flags |= RTM_F_CLONED;
1547
1548         if (dst) {
1549                 RTA_PUT(skb, RTA_DST, 16, dst);
1550                 rtm->rtm_dst_len = 128;
1551         } else if (rtm->rtm_dst_len)
1552                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1553 #ifdef CONFIG_IPV6_SUBTREES
1554         if (src) {
1555                 RTA_PUT(skb, RTA_SRC, 16, src);
1556                 rtm->rtm_src_len = 128;
1557         } else if (rtm->rtm_src_len)
1558                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1559 #endif
1560         if (iif)
1561                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1562         else if (dst) {
1563                 struct in6_addr saddr_buf;
1564                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1565                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1566         }
1567         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1568                 goto rtattr_failure;
1569         if (rt->u.dst.neighbour)
1570                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1571         if (rt->u.dst.dev)
1572                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1573         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1574         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1575         if (rt->rt6i_expires)
1576                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1577         else
1578                 ci.rta_expires = 0;
1579         ci.rta_used = rt->u.dst.__use;
1580         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1581         ci.rta_error = rt->u.dst.error;
1582         ci.rta_id = 0;
1583         ci.rta_ts = 0;
1584         ci.rta_tsage = 0;
1585         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1586         nlh->nlmsg_len = skb->tail - b;
1587         return skb->len;
1588
1589 nlmsg_failure:
1590 rtattr_failure:
1591         skb_trim(skb, b - skb->data);
1592         return -1;
1593 }
1594
1595 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1596 {
1597         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1598         int prefix;
1599
1600         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1601                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1602                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1603         } else
1604                 prefix = 0;
1605
1606         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1607                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1608                      NULL, prefix);
1609 }
1610
1611 static int fib6_dump_node(struct fib6_walker_t *w)
1612 {
1613         int res;
1614         struct rt6_info *rt;
1615
1616         for (rt = w->leaf; rt; rt = rt->u.next) {
1617                 res = rt6_dump_route(rt, w->args);
1618                 if (res < 0) {
1619                         /* Frame is full, suspend walking */
1620                         w->leaf = rt;
1621                         return 1;
1622                 }
1623                 BUG_TRAP(res!=0);
1624         }
1625         w->leaf = NULL;
1626         return 0;
1627 }
1628
1629 static void fib6_dump_end(struct netlink_callback *cb)
1630 {
1631         struct fib6_walker_t *w = (void*)cb->args[0];
1632
1633         if (w) {
1634                 cb->args[0] = 0;
1635                 fib6_walker_unlink(w);
1636                 kfree(w);
1637         }
1638         if (cb->args[1]) {
1639                 cb->done = (void*)cb->args[1];
1640                 cb->args[1] = 0;
1641         }
1642 }
1643
1644 static int fib6_dump_done(struct netlink_callback *cb)
1645 {
1646         fib6_dump_end(cb);
1647         return cb->done(cb);
1648 }
1649
1650 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1651 {
1652         struct rt6_rtnl_dump_arg arg;
1653         struct fib6_walker_t *w;
1654         int res;
1655
1656         arg.skb = skb;
1657         arg.cb = cb;
1658
1659         w = (void*)cb->args[0];
1660         if (w == NULL) {
1661                 /* New dump:
1662                  * 
1663                  * 1. hook callback destructor.
1664                  */
1665                 cb->args[1] = (long)cb->done;
1666                 cb->done = fib6_dump_done;
1667
1668                 /*
1669                  * 2. allocate and initialize walker.
1670                  */
1671                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1672                 if (w == NULL)
1673                         return -ENOMEM;
1674                 RT6_TRACE("dump<%p", w);
1675                 memset(w, 0, sizeof(*w));
1676                 w->root = &ip6_routing_table;
1677                 w->func = fib6_dump_node;
1678                 w->args = &arg;
1679                 cb->args[0] = (long)w;
1680                 read_lock_bh(&rt6_lock);
1681                 res = fib6_walk(w);
1682                 read_unlock_bh(&rt6_lock);
1683         } else {
1684                 w->args = &arg;
1685                 read_lock_bh(&rt6_lock);
1686                 res = fib6_walk_continue(w);
1687                 read_unlock_bh(&rt6_lock);
1688         }
1689 #if RT6_DEBUG >= 3
1690         if (res <= 0 && skb->len == 0)
1691                 RT6_TRACE("%p>dump end\n", w);
1692 #endif
1693         res = res < 0 ? res : skb->len;
1694         /* res < 0 is an error. (really, impossible)
1695            res == 0 means that dump is complete, but skb still can contain data.
1696            res > 0 dump is not complete, but frame is full.
1697          */
1698         /* Destroy walker, if dump of this table is complete. */
1699         if (res <= 0)
1700                 fib6_dump_end(cb);
1701         return res;
1702 }
1703
1704 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1705 {
1706         struct rtattr **rta = arg;
1707         int iif = 0;
1708         int err = -ENOBUFS;
1709         struct sk_buff *skb;
1710         struct flowi fl;
1711         struct rt6_info *rt;
1712
1713         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1714         if (skb == NULL)
1715                 goto out;
1716
1717         /* Reserve room for dummy headers, this skb can pass
1718            through good chunk of routing engine.
1719          */
1720         skb->mac.raw = skb->data;
1721         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1722
1723         memset(&fl, 0, sizeof(fl));
1724         if (rta[RTA_SRC-1])
1725                 ipv6_addr_copy(&fl.fl6_src,
1726                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1727         if (rta[RTA_DST-1])
1728                 ipv6_addr_copy(&fl.fl6_dst,
1729                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1730
1731         if (rta[RTA_IIF-1])
1732                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1733
1734         if (iif) {
1735                 struct net_device *dev;
1736                 dev = __dev_get_by_index(iif);
1737                 if (!dev) {
1738                         err = -ENODEV;
1739                         goto out_free;
1740                 }
1741         }
1742
1743         fl.oif = 0;
1744         if (rta[RTA_OIF-1])
1745                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1746
1747         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1748
1749         skb->dst = &rt->u.dst;
1750
1751         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1752         err = rt6_fill_node(skb, rt, 
1753                             &fl.fl6_dst, &fl.fl6_src,
1754                             iif,
1755                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1756                             nlh->nlmsg_seq, nlh, 0);
1757         if (err < 0) {
1758                 err = -EMSGSIZE;
1759                 goto out_free;
1760         }
1761
1762         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1763         if (err > 0)
1764                 err = 0;
1765 out:
1766         return err;
1767 out_free:
1768         kfree_skb(skb);
1769         goto out;       
1770 }
1771
1772 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1773 {
1774         struct sk_buff *skb;
1775         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1776
1777         skb = alloc_skb(size, gfp_any());
1778         if (!skb) {
1779                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1780                 return;
1781         }
1782         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1783                 kfree_skb(skb);
1784                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1785                 return;
1786         }
1787         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1788         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1789 }
1790
1791 /*
1792  *      /proc
1793  */
1794
1795 #ifdef CONFIG_PROC_FS
1796
1797 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1798
1799 struct rt6_proc_arg
1800 {
1801         char *buffer;
1802         int offset;
1803         int length;
1804         int skip;
1805         int len;
1806 };
1807
1808 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1809 {
1810         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1811         int i;
1812
1813         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1814                 arg->skip++;
1815                 return 0;
1816         }
1817
1818         if (arg->len >= arg->length)
1819                 return 0;
1820
1821         for (i=0; i<16; i++) {
1822                 sprintf(arg->buffer + arg->len, "%02x",
1823                         rt->rt6i_dst.addr.s6_addr[i]);
1824                 arg->len += 2;
1825         }
1826         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1827                             rt->rt6i_dst.plen);
1828
1829 #ifdef CONFIG_IPV6_SUBTREES
1830         for (i=0; i<16; i++) {
1831                 sprintf(arg->buffer + arg->len, "%02x",
1832                         rt->rt6i_src.addr.s6_addr[i]);
1833                 arg->len += 2;
1834         }
1835         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1836                             rt->rt6i_src.plen);
1837 #else
1838         sprintf(arg->buffer + arg->len,
1839                 "00000000000000000000000000000000 00 ");
1840         arg->len += 36;
1841 #endif
1842
1843         if (rt->rt6i_nexthop) {
1844                 for (i=0; i<16; i++) {
1845                         sprintf(arg->buffer + arg->len, "%02x",
1846                                 rt->rt6i_nexthop->primary_key[i]);
1847                         arg->len += 2;
1848                 }
1849         } else {
1850                 sprintf(arg->buffer + arg->len,
1851                         "00000000000000000000000000000000");
1852                 arg->len += 32;
1853         }
1854         arg->len += sprintf(arg->buffer + arg->len,
1855                             " %08x %08x %08x %08x %8s\n",
1856                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1857                             rt->u.dst.__use, rt->rt6i_flags, 
1858                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1859         return 0;
1860 }
1861
1862 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1863 {
1864         struct rt6_proc_arg arg;
1865         arg.buffer = buffer;
1866         arg.offset = offset;
1867         arg.length = length;
1868         arg.skip = 0;
1869         arg.len = 0;
1870
1871         read_lock_bh(&rt6_lock);
1872         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1873         read_unlock_bh(&rt6_lock);
1874
1875         *start = buffer;
1876         if (offset)
1877                 *start += offset % RT6_INFO_LEN;
1878
1879         arg.len -= offset % RT6_INFO_LEN;
1880
1881         if (arg.len > length)
1882                 arg.len = length;
1883         if (arg.len < 0)
1884                 arg.len = 0;
1885
1886         return arg.len;
1887 }
1888
1889 extern struct rt6_statistics rt6_stats;
1890
1891 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1892 {
1893         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1894                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1895                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1896                       rt6_stats.fib_rt_cache,
1897                       atomic_read(&ip6_dst_ops.entries),
1898                       rt6_stats.fib_discarded_routes);
1899
1900         return 0;
1901 }
1902
1903 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1904 {
1905         return single_open(file, rt6_stats_seq_show, NULL);
1906 }
1907
1908 static struct file_operations rt6_stats_seq_fops = {
1909         .owner   = THIS_MODULE,
1910         .open    = rt6_stats_seq_open,
1911         .read    = seq_read,
1912         .llseek  = seq_lseek,
1913         .release = single_release,
1914 };
1915 #endif  /* CONFIG_PROC_FS */
1916
1917 #ifdef CONFIG_SYSCTL
1918
1919 static int flush_delay;
1920
1921 static
1922 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1923                               void __user *buffer, size_t *lenp)
1924 {
1925         if (write) {
1926                 proc_dointvec(ctl, write, filp, buffer, lenp);
1927                 if (flush_delay < 0)
1928                         flush_delay = 0;
1929                 fib6_run_gc((unsigned long)flush_delay);
1930                 return 0;
1931         } else
1932                 return -EINVAL;
1933 }
1934
1935 ctl_table ipv6_route_table[] = {
1936         {
1937                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1938                 .procname       =       "flush",
1939                 .data           =       &flush_delay,
1940                 .maxlen         =       sizeof(int),
1941                 .mode           =       0644,
1942                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1943         },
1944         {
1945                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1946                 .procname       =       "gc_thresh",
1947                 .data           =       &ip6_dst_ops.gc_thresh,
1948                 .maxlen         =       sizeof(int),
1949                 .mode           =       0644,
1950                 .proc_handler   =       &proc_dointvec,
1951         },
1952         {
1953                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1954                 .procname       =       "max_size",
1955                 .data           =       &ip6_rt_max_size,
1956                 .maxlen         =       sizeof(int),
1957                 .mode           =       0644,
1958                 .proc_handler   =       &proc_dointvec,
1959         },
1960         {
1961                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1962                 .procname       =       "gc_min_interval",
1963                 .data           =       &ip6_rt_gc_min_interval,
1964                 .maxlen         =       sizeof(int),
1965                 .mode           =       0644,
1966                 .proc_handler   =       &proc_dointvec_jiffies,
1967                 .strategy       =       &sysctl_jiffies,
1968         },
1969         {
1970                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1971                 .procname       =       "gc_timeout",
1972                 .data           =       &ip6_rt_gc_timeout,
1973                 .maxlen         =       sizeof(int),
1974                 .mode           =       0644,
1975                 .proc_handler   =       &proc_dointvec_jiffies,
1976                 .strategy       =       &sysctl_jiffies,
1977         },
1978         {
1979                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
1980                 .procname       =       "gc_interval",
1981                 .data           =       &ip6_rt_gc_interval,
1982                 .maxlen         =       sizeof(int),
1983                 .mode           =       0644,
1984                 .proc_handler   =       &proc_dointvec_jiffies,
1985                 .strategy       =       &sysctl_jiffies,
1986         },
1987         {
1988                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
1989                 .procname       =       "gc_elasticity",
1990                 .data           =       &ip6_rt_gc_elasticity,
1991                 .maxlen         =       sizeof(int),
1992                 .mode           =       0644,
1993                 .proc_handler   =       &proc_dointvec_jiffies,
1994                 .strategy       =       &sysctl_jiffies,
1995         },
1996         {
1997                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
1998                 .procname       =       "mtu_expires",
1999                 .data           =       &ip6_rt_mtu_expires,
2000                 .maxlen         =       sizeof(int),
2001                 .mode           =       0644,
2002                 .proc_handler   =       &proc_dointvec_jiffies,
2003                 .strategy       =       &sysctl_jiffies,
2004         },
2005         {
2006                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2007                 .procname       =       "min_adv_mss",
2008                 .data           =       &ip6_rt_min_advmss,
2009                 .maxlen         =       sizeof(int),
2010                 .mode           =       0644,
2011                 .proc_handler   =       &proc_dointvec_jiffies,
2012                 .strategy       =       &sysctl_jiffies,
2013         },
2014         { .ctl_name = 0 }
2015 };
2016
2017 #endif
2018
2019 void __init ip6_route_init(void)
2020 {
2021         struct proc_dir_entry *p;
2022
2023         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2024                                                      sizeof(struct rt6_info),
2025                                                      0, SLAB_HWCACHE_ALIGN,
2026                                                      NULL, NULL);
2027         if (!ip6_dst_ops.kmem_cachep)
2028                 panic("cannot create ip6_dst_cache");
2029
2030         fib6_init();
2031 #ifdef  CONFIG_PROC_FS
2032         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2033         if (p)
2034                 p->owner = THIS_MODULE;
2035
2036         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2037 #endif
2038 #ifdef CONFIG_XFRM
2039         xfrm6_init();
2040 #endif
2041 }
2042
2043 void __exit ip6_route_cleanup(void)
2044 {
2045 #ifdef CONFIG_PROC_FS
2046         proc_net_remove("ipv6_route");
2047         proc_net_remove("rt6_stats");
2048 #endif
2049 #ifdef CONFIG_XFRM
2050         xfrm6_fini();
2051 #endif
2052         rt6_ifdown(NULL);
2053         fib6_gc_cleanup();
2054         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2055 }