fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/sched.h>
39 #include <linux/string.h>
40 #include <linux/mm.h>
41 #include <linux/socket.h>
42 #include <linux/sockios.h>
43 #include <linux/in.h>
44 #include <linux/errno.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_addr.h>
47 #include <linux/if_ether.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/etherdevice.h>
51 #include <linux/skbuff.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/notifier.h>
55 #include <linux/inetdevice.h>
56 #include <linux/igmp.h>
57 #ifdef CONFIG_SYSCTL
58 #include <linux/sysctl.h>
59 #endif
60 #include <linux/kmod.h>
61 #include <linux/vs_context.h>
62
63 #include <net/arp.h>
64 #include <net/ip.h>
65 #include <net/route.h>
66 #include <net/ip_fib.h>
67 #include <net/netlink.h>
68
69 struct ipv4_devconf ipv4_devconf = {
70         .accept_redirects = 1,
71         .send_redirects =  1,
72         .secure_redirects = 1,
73         .shared_media =   1,
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .accept_redirects =  1,
78         .send_redirects =    1,
79         .secure_redirects =  1,
80         .shared_media =      1,
81         .accept_source_route = 1,
82 };
83
84 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
85         [IFA_LOCAL]             = { .type = NLA_U32 },
86         [IFA_ADDRESS]           = { .type = NLA_U32 },
87         [IFA_BROADCAST]         = { .type = NLA_U32 },
88         [IFA_ANYCAST]           = { .type = NLA_U32 },
89         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
90 };
91
92 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
93
94 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
95 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
96                          int destroy);
97 #ifdef CONFIG_SYSCTL
98 static void devinet_sysctl_register(struct in_device *in_dev,
99                                     struct ipv4_devconf *p);
100 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
101 #endif
102
103 /* Locks all the inet devices. */
104
105 static struct in_ifaddr *inet_alloc_ifa(void)
106 {
107         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
108
109         if (ifa) {
110                 INIT_RCU_HEAD(&ifa->rcu_head);
111         }
112
113         return ifa;
114 }
115
116 static void inet_rcu_free_ifa(struct rcu_head *head)
117 {
118         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
119         if (ifa->ifa_dev)
120                 in_dev_put(ifa->ifa_dev);
121         kfree(ifa);
122 }
123
124 static inline void inet_free_ifa(struct in_ifaddr *ifa)
125 {
126         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
127 }
128
129 void in_dev_finish_destroy(struct in_device *idev)
130 {
131         struct net_device *dev = idev->dev;
132
133         BUG_TRAP(!idev->ifa_list);
134         BUG_TRAP(!idev->mc_list);
135 #ifdef NET_REFCNT_DEBUG
136         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
137                idev, dev ? dev->name : "NIL");
138 #endif
139         dev_put(dev);
140         if (!idev->dead)
141                 printk("Freeing alive in_device %p\n", idev);
142         else {
143                 kfree(idev);
144         }
145 }
146
147 struct in_device *inetdev_init(struct net_device *dev)
148 {
149         struct in_device *in_dev;
150
151         ASSERT_RTNL();
152
153         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
154         if (!in_dev)
155                 goto out;
156         INIT_RCU_HEAD(&in_dev->rcu_head);
157         memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
158         in_dev->cnf.sysctl = NULL;
159         in_dev->dev = dev;
160         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
161                 goto out_kfree;
162         /* Reference in_dev->dev */
163         dev_hold(dev);
164 #ifdef CONFIG_SYSCTL
165         neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
166                               NET_IPV4_NEIGH, "ipv4", NULL, NULL);
167 #endif
168
169         /* Account for reference dev->ip_ptr (below) */
170         in_dev_hold(in_dev);
171
172 #ifdef CONFIG_SYSCTL
173         devinet_sysctl_register(in_dev, &in_dev->cnf);
174 #endif
175         ip_mc_init_dev(in_dev);
176         if (dev->flags & IFF_UP)
177                 ip_mc_up(in_dev);
178
179         /* we can receive as soon as ip_ptr is set -- do this last */
180         rcu_assign_pointer(dev->ip_ptr, in_dev);
181 out:
182         return in_dev;
183 out_kfree:
184         kfree(in_dev);
185         in_dev = NULL;
186         goto out;
187 }
188
189 static void in_dev_rcu_put(struct rcu_head *head)
190 {
191         struct in_device *idev = container_of(head, struct in_device, rcu_head);
192         in_dev_put(idev);
193 }
194
195 static void inetdev_destroy(struct in_device *in_dev)
196 {
197         struct in_ifaddr *ifa;
198         struct net_device *dev;
199
200         ASSERT_RTNL();
201
202         dev = in_dev->dev;
203         if (dev == &loopback_dev)
204                 return;
205
206         in_dev->dead = 1;
207
208         ip_mc_destroy_dev(in_dev);
209
210         while ((ifa = in_dev->ifa_list) != NULL) {
211                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
212                 inet_free_ifa(ifa);
213         }
214
215 #ifdef CONFIG_SYSCTL
216         devinet_sysctl_unregister(&in_dev->cnf);
217 #endif
218
219         dev->ip_ptr = NULL;
220
221 #ifdef CONFIG_SYSCTL
222         neigh_sysctl_unregister(in_dev->arp_parms);
223 #endif
224         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
225         arp_ifdown(dev);
226
227         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
228 }
229
230 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
231 {
232         rcu_read_lock();
233         for_primary_ifa(in_dev) {
234                 if (inet_ifa_match(a, ifa)) {
235                         if (!b || inet_ifa_match(b, ifa)) {
236                                 rcu_read_unlock();
237                                 return 1;
238                         }
239                 }
240         } endfor_ifa(in_dev);
241         rcu_read_unlock();
242         return 0;
243 }
244
245 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
246                          int destroy, struct nlmsghdr *nlh, u32 pid)
247 {
248         struct in_ifaddr *promote = NULL;
249         struct in_ifaddr *ifa, *ifa1 = *ifap;
250         struct in_ifaddr *last_prim = in_dev->ifa_list;
251         struct in_ifaddr *prev_prom = NULL;
252         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
253
254         ASSERT_RTNL();
255
256         /* 1. Deleting primary ifaddr forces deletion all secondaries 
257          * unless alias promotion is set
258          **/
259
260         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
261                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
262
263                 while ((ifa = *ifap1) != NULL) {
264                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 
265                             ifa1->ifa_scope <= ifa->ifa_scope)
266                                 last_prim = ifa;
267
268                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
269                             ifa1->ifa_mask != ifa->ifa_mask ||
270                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
271                                 ifap1 = &ifa->ifa_next;
272                                 prev_prom = ifa;
273                                 continue;
274                         }
275
276                         if (!do_promote) {
277                                 *ifap1 = ifa->ifa_next;
278
279                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
280                                 blocking_notifier_call_chain(&inetaddr_chain,
281                                                 NETDEV_DOWN, ifa);
282                                 inet_free_ifa(ifa);
283                         } else {
284                                 promote = ifa;
285                                 break;
286                         }
287                 }
288         }
289
290         /* 2. Unlink it */
291
292         *ifap = ifa1->ifa_next;
293
294         /* 3. Announce address deletion */
295
296         /* Send message first, then call notifier.
297            At first sight, FIB update triggered by notifier
298            will refer to already deleted ifaddr, that could confuse
299            netlink listeners. It is not true: look, gated sees
300            that route deleted and if it still thinks that ifaddr
301            is valid, it will try to restore deleted routes... Grr.
302            So that, this order is correct.
303          */
304         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
305         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
306
307         if (promote) {
308
309                 if (prev_prom) {
310                         prev_prom->ifa_next = promote->ifa_next;
311                         promote->ifa_next = last_prim->ifa_next;
312                         last_prim->ifa_next = promote;
313                 }
314
315                 promote->ifa_flags &= ~IFA_F_SECONDARY;
316                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
317                 blocking_notifier_call_chain(&inetaddr_chain,
318                                 NETDEV_UP, promote);
319                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
320                         if (ifa1->ifa_mask != ifa->ifa_mask ||
321                             !inet_ifa_match(ifa1->ifa_address, ifa))
322                                         continue;
323                         fib_add_ifaddr(ifa);
324                 }
325
326         }
327         if (destroy) {
328                 inet_free_ifa(ifa1);
329
330                 if (!in_dev->ifa_list)
331                         inetdev_destroy(in_dev);
332         }
333 }
334
335 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
336                          int destroy)
337 {
338         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
339 }
340
341 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
342                              u32 pid)
343 {
344         struct in_device *in_dev = ifa->ifa_dev;
345         struct in_ifaddr *ifa1, **ifap, **last_primary;
346
347         ASSERT_RTNL();
348
349         if (!ifa->ifa_local) {
350                 inet_free_ifa(ifa);
351                 return 0;
352         }
353
354         ifa->ifa_flags &= ~IFA_F_SECONDARY;
355         last_primary = &in_dev->ifa_list;
356
357         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
358              ifap = &ifa1->ifa_next) {
359                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
360                     ifa->ifa_scope <= ifa1->ifa_scope)
361                         last_primary = &ifa1->ifa_next;
362                 if (ifa1->ifa_mask == ifa->ifa_mask &&
363                     inet_ifa_match(ifa1->ifa_address, ifa)) {
364                         if (ifa1->ifa_local == ifa->ifa_local) {
365                                 inet_free_ifa(ifa);
366                                 return -EEXIST;
367                         }
368                         if (ifa1->ifa_scope != ifa->ifa_scope) {
369                                 inet_free_ifa(ifa);
370                                 return -EINVAL;
371                         }
372                         ifa->ifa_flags |= IFA_F_SECONDARY;
373                 }
374         }
375
376         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
377                 net_srandom(ifa->ifa_local);
378                 ifap = last_primary;
379         }
380
381         ifa->ifa_next = *ifap;
382         *ifap = ifa;
383
384         /* Send message first, then call notifier.
385            Notifier will trigger FIB update, so that
386            listeners of netlink will know about new ifaddr */
387         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
388         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
389
390         return 0;
391 }
392
393 static int inet_insert_ifa(struct in_ifaddr *ifa)
394 {
395         return __inet_insert_ifa(ifa, NULL, 0);
396 }
397
398 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
399 {
400         struct in_device *in_dev = __in_dev_get_rtnl(dev);
401
402         ASSERT_RTNL();
403
404         if (!in_dev) {
405                 in_dev = inetdev_init(dev);
406                 if (!in_dev) {
407                         inet_free_ifa(ifa);
408                         return -ENOBUFS;
409                 }
410         }
411         if (ifa->ifa_dev != in_dev) {
412                 BUG_TRAP(!ifa->ifa_dev);
413                 in_dev_hold(in_dev);
414                 ifa->ifa_dev = in_dev;
415         }
416         if (LOOPBACK(ifa->ifa_local))
417                 ifa->ifa_scope = RT_SCOPE_HOST;
418         return inet_insert_ifa(ifa);
419 }
420
421 struct in_device *inetdev_by_index(int ifindex)
422 {
423         struct net_device *dev;
424         struct in_device *in_dev = NULL;
425         read_lock(&dev_base_lock);
426         dev = __dev_get_by_index(ifindex);
427         if (dev)
428                 in_dev = in_dev_get(dev);
429         read_unlock(&dev_base_lock);
430         return in_dev;
431 }
432
433 /* Called only from RTNL semaphored context. No locks. */
434
435 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
436                                     __be32 mask)
437 {
438         ASSERT_RTNL();
439
440         for_primary_ifa(in_dev) {
441                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
442                         return ifa;
443         } endfor_ifa(in_dev);
444         return NULL;
445 }
446
447 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
448 {
449         struct nlattr *tb[IFA_MAX+1];
450         struct in_device *in_dev;
451         struct ifaddrmsg *ifm;
452         struct in_ifaddr *ifa, **ifap;
453         int err = -EINVAL;
454
455         ASSERT_RTNL();
456
457         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
458         if (err < 0)
459                 goto errout;
460
461         ifm = nlmsg_data(nlh);
462         in_dev = inetdev_by_index(ifm->ifa_index);
463         if (in_dev == NULL) {
464                 err = -ENODEV;
465                 goto errout;
466         }
467
468         __in_dev_put(in_dev);
469
470         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
471              ifap = &ifa->ifa_next) {
472                 if (tb[IFA_LOCAL] &&
473                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
474                         continue;
475
476                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
477                         continue;
478
479                 if (tb[IFA_ADDRESS] &&
480                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
481                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
482                         continue;
483
484                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
485                 return 0;
486         }
487
488         err = -EADDRNOTAVAIL;
489 errout:
490         return err;
491 }
492
493 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
494 {
495         struct nlattr *tb[IFA_MAX+1];
496         struct in_ifaddr *ifa;
497         struct ifaddrmsg *ifm;
498         struct net_device *dev;
499         struct in_device *in_dev;
500         int err = -EINVAL;
501
502         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
503         if (err < 0)
504                 goto errout;
505
506         ifm = nlmsg_data(nlh);
507         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
508                 err = -EINVAL;
509                 goto errout;
510         }
511
512         dev = __dev_get_by_index(ifm->ifa_index);
513         if (dev == NULL) {
514                 err = -ENODEV;
515                 goto errout;
516         }
517
518         in_dev = __in_dev_get_rtnl(dev);
519         if (in_dev == NULL) {
520                 in_dev = inetdev_init(dev);
521                 if (in_dev == NULL) {
522                         err = -ENOBUFS;
523                         goto errout;
524                 }
525         }
526
527         ifa = inet_alloc_ifa();
528         if (ifa == NULL) {
529                 /*
530                  * A potential indev allocation can be left alive, it stays
531                  * assigned to its device and is destroy with it.
532                  */
533                 err = -ENOBUFS;
534                 goto errout;
535         }
536
537         in_dev_hold(in_dev);
538
539         if (tb[IFA_ADDRESS] == NULL)
540                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
541
542         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
543         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
544         ifa->ifa_flags = ifm->ifa_flags;
545         ifa->ifa_scope = ifm->ifa_scope;
546         ifa->ifa_dev = in_dev;
547
548         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
549         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
550
551         if (tb[IFA_BROADCAST])
552                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
553
554         if (tb[IFA_ANYCAST])
555                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
556
557         if (tb[IFA_LABEL])
558                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
559         else
560                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
561
562         return ifa;
563
564 errout:
565         return ERR_PTR(err);
566 }
567
568 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
569 {
570         struct in_ifaddr *ifa;
571
572         ASSERT_RTNL();
573
574         ifa = rtm_to_ifaddr(nlh);
575         if (IS_ERR(ifa))
576                 return PTR_ERR(ifa);
577
578         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
579 }
580
581 /*
582  *      Determine a default network mask, based on the IP address.
583  */
584
585 static __inline__ int inet_abc_len(__be32 addr)
586 {
587         int rc = -1;    /* Something else, probably a multicast. */
588
589         if (ZERONET(addr))
590                 rc = 0;
591         else {
592                 __u32 haddr = ntohl(addr);
593
594                 if (IN_CLASSA(haddr))
595                         rc = 8;
596                 else if (IN_CLASSB(haddr))
597                         rc = 16;
598                 else if (IN_CLASSC(haddr))
599                         rc = 24;
600         }
601
602         return rc;
603 }
604
605
606 int devinet_ioctl(unsigned int cmd, void __user *arg)
607 {
608         struct ifreq ifr;
609         struct sockaddr_in sin_orig;
610         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
611         struct in_device *in_dev;
612         struct in_ifaddr **ifap = NULL;
613         struct in_ifaddr *ifa = NULL;
614         struct net_device *dev;
615         char *colon;
616         int ret = -EFAULT;
617         int tryaddrmatch = 0;
618
619         /*
620          *      Fetch the caller's info block into kernel space
621          */
622
623         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
624                 goto out;
625         ifr.ifr_name[IFNAMSIZ - 1] = 0;
626
627         /* save original address for comparison */
628         memcpy(&sin_orig, sin, sizeof(*sin));
629
630         colon = strchr(ifr.ifr_name, ':');
631         if (colon)
632                 *colon = 0;
633
634 #ifdef CONFIG_KMOD
635         dev_load(ifr.ifr_name);
636 #endif
637
638         switch(cmd) {
639         case SIOCGIFADDR:       /* Get interface address */
640         case SIOCGIFBRDADDR:    /* Get the broadcast address */
641         case SIOCGIFDSTADDR:    /* Get the destination address */
642         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
643                 /* Note that these ioctls will not sleep,
644                    so that we do not impose a lock.
645                    One day we will be forced to put shlock here (I mean SMP)
646                  */
647                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
648                 memset(sin, 0, sizeof(*sin));
649                 sin->sin_family = AF_INET;
650                 break;
651
652         case SIOCSIFFLAGS:
653                 ret = -EACCES;
654                 if (!capable(CAP_NET_ADMIN))
655                         goto out;
656                 break;
657         case SIOCSIFADDR:       /* Set interface address (and family) */
658         case SIOCSIFBRDADDR:    /* Set the broadcast address */
659         case SIOCSIFDSTADDR:    /* Set the destination address */
660         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
661                 ret = -EACCES;
662                 if (!capable(CAP_NET_ADMIN))
663                         goto out;
664                 ret = -EINVAL;
665                 if (sin->sin_family != AF_INET)
666                         goto out;
667                 break;
668         default:
669                 ret = -EINVAL;
670                 goto out;
671         }
672
673         rtnl_lock();
674
675         ret = -ENODEV;
676         if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
677                 goto done;
678
679         if (colon)
680                 *colon = ':';
681
682         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
683                 struct nx_info *nxi = current->nx_info;
684                 int hide_netif = vx_flags(VXF_HIDE_NETIF, 0);
685
686                 if (tryaddrmatch) {
687                         /* Matthias Andree */
688                         /* compare label and address (4.4BSD style) */
689                         /* note: we only do this for a limited set of ioctls
690                            and only if the original address family was AF_INET.
691                            This is checked above. */
692                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
693                              ifap = &ifa->ifa_next) {
694                                 if (hide_netif && !ifa_in_nx_info(ifa, nxi))
695                                         continue;
696                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
697                                     sin_orig.sin_addr.s_addr ==
698                                                         ifa->ifa_address) {
699                                         break; /* found */
700                                 }
701                         }
702                 }
703                 /* we didn't get a match, maybe the application is
704                    4.3BSD-style and passed in junk so we fall back to
705                    comparing just the label */
706                 if (!ifa) {
707                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
708                              ifap = &ifa->ifa_next) {
709                                 if (hide_netif && !ifa_in_nx_info(ifa, nxi))
710                                         continue;
711                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
712                                         break;
713                         }
714                 }
715         }
716
717         ret = -EADDRNOTAVAIL;
718         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
719                 goto done;
720
721         switch(cmd) {
722         case SIOCGIFADDR:       /* Get interface address */
723                 sin->sin_addr.s_addr = ifa->ifa_local;
724                 goto rarok;
725
726         case SIOCGIFBRDADDR:    /* Get the broadcast address */
727                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
728                 goto rarok;
729
730         case SIOCGIFDSTADDR:    /* Get the destination address */
731                 sin->sin_addr.s_addr = ifa->ifa_address;
732                 goto rarok;
733
734         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
735                 sin->sin_addr.s_addr = ifa->ifa_mask;
736                 goto rarok;
737
738         case SIOCSIFFLAGS:
739                 if (colon) {
740                         ret = -EADDRNOTAVAIL;
741                         if (!ifa)
742                                 break;
743                         ret = 0;
744                         if (!(ifr.ifr_flags & IFF_UP))
745                                 inet_del_ifa(in_dev, ifap, 1);
746                         break;
747                 }
748                 ret = dev_change_flags(dev, ifr.ifr_flags);
749                 break;
750
751         case SIOCSIFADDR:       /* Set interface address (and family) */
752                 ret = -EINVAL;
753                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
754                         break;
755
756                 if (!ifa) {
757                         ret = -ENOBUFS;
758                         if ((ifa = inet_alloc_ifa()) == NULL)
759                                 break;
760                         if (colon)
761                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
762                         else
763                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
764                 } else {
765                         ret = 0;
766                         if (ifa->ifa_local == sin->sin_addr.s_addr)
767                                 break;
768                         inet_del_ifa(in_dev, ifap, 0);
769                         ifa->ifa_broadcast = 0;
770                         ifa->ifa_anycast = 0;
771                 }
772
773                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
774
775                 if (!(dev->flags & IFF_POINTOPOINT)) {
776                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
777                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
778                         if ((dev->flags & IFF_BROADCAST) &&
779                             ifa->ifa_prefixlen < 31)
780                                 ifa->ifa_broadcast = ifa->ifa_address |
781                                                      ~ifa->ifa_mask;
782                 } else {
783                         ifa->ifa_prefixlen = 32;
784                         ifa->ifa_mask = inet_make_mask(32);
785                 }
786                 ret = inet_set_ifa(dev, ifa);
787                 break;
788
789         case SIOCSIFBRDADDR:    /* Set the broadcast address */
790                 ret = 0;
791                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
792                         inet_del_ifa(in_dev, ifap, 0);
793                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
794                         inet_insert_ifa(ifa);
795                 }
796                 break;
797
798         case SIOCSIFDSTADDR:    /* Set the destination address */
799                 ret = 0;
800                 if (ifa->ifa_address == sin->sin_addr.s_addr)
801                         break;
802                 ret = -EINVAL;
803                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
804                         break;
805                 ret = 0;
806                 inet_del_ifa(in_dev, ifap, 0);
807                 ifa->ifa_address = sin->sin_addr.s_addr;
808                 inet_insert_ifa(ifa);
809                 break;
810
811         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
812
813                 /*
814                  *      The mask we set must be legal.
815                  */
816                 ret = -EINVAL;
817                 if (bad_mask(sin->sin_addr.s_addr, 0))
818                         break;
819                 ret = 0;
820                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
821                         __be32 old_mask = ifa->ifa_mask;
822                         inet_del_ifa(in_dev, ifap, 0);
823                         ifa->ifa_mask = sin->sin_addr.s_addr;
824                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
825
826                         /* See if current broadcast address matches
827                          * with current netmask, then recalculate
828                          * the broadcast address. Otherwise it's a
829                          * funny address, so don't touch it since
830                          * the user seems to know what (s)he's doing...
831                          */
832                         if ((dev->flags & IFF_BROADCAST) &&
833                             (ifa->ifa_prefixlen < 31) &&
834                             (ifa->ifa_broadcast ==
835                              (ifa->ifa_local|~old_mask))) {
836                                 ifa->ifa_broadcast = (ifa->ifa_local |
837                                                       ~sin->sin_addr.s_addr);
838                         }
839                         inet_insert_ifa(ifa);
840                 }
841                 break;
842         }
843 done:
844         rtnl_unlock();
845 out:
846         return ret;
847 rarok:
848         rtnl_unlock();
849         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
850         goto out;
851 }
852
853 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
854 {
855         struct in_device *in_dev = __in_dev_get_rtnl(dev);
856         struct in_ifaddr *ifa;
857         struct ifreq ifr;
858         int done = 0;
859
860         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
861                 goto out;
862
863         for (; ifa; ifa = ifa->ifa_next) {
864                 if (vx_flags(VXF_HIDE_NETIF, 0) &&
865                         !ifa_in_nx_info(ifa, current->nx_info))
866                         continue;
867                 if (!buf) {
868                         done += sizeof(ifr);
869                         continue;
870                 }
871                 if (len < (int) sizeof(ifr))
872                         break;
873                 memset(&ifr, 0, sizeof(struct ifreq));
874                 if (ifa->ifa_label)
875                         strcpy(ifr.ifr_name, ifa->ifa_label);
876                 else
877                         strcpy(ifr.ifr_name, dev->name);
878
879                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
880                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
881                                                                 ifa->ifa_local;
882
883                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
884                         done = -EFAULT;
885                         break;
886                 }
887                 buf  += sizeof(struct ifreq);
888                 len  -= sizeof(struct ifreq);
889                 done += sizeof(struct ifreq);
890         }
891 out:
892         return done;
893 }
894
895 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
896 {
897         __be32 addr = 0;
898         struct in_device *in_dev;
899
900         rcu_read_lock();
901         in_dev = __in_dev_get_rcu(dev);
902         if (!in_dev)
903                 goto no_in_dev;
904
905         for_primary_ifa(in_dev) {
906                 if (ifa->ifa_scope > scope)
907                         continue;
908                 if (!dst || inet_ifa_match(dst, ifa)) {
909                         addr = ifa->ifa_local;
910                         break;
911                 }
912                 if (!addr)
913                         addr = ifa->ifa_local;
914         } endfor_ifa(in_dev);
915 no_in_dev:
916         rcu_read_unlock();
917
918         if (addr)
919                 goto out;
920
921         /* Not loopback addresses on loopback should be preferred
922            in this case. It is importnat that lo is the first interface
923            in dev_base list.
924          */
925         read_lock(&dev_base_lock);
926         rcu_read_lock();
927         for (dev = dev_base; dev; dev = dev->next) {
928                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
929                         continue;
930
931                 for_primary_ifa(in_dev) {
932                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
933                             ifa->ifa_scope <= scope) {
934                                 addr = ifa->ifa_local;
935                                 goto out_unlock_both;
936                         }
937                 } endfor_ifa(in_dev);
938         }
939 out_unlock_both:
940         read_unlock(&dev_base_lock);
941         rcu_read_unlock();
942 out:
943         return addr;
944 }
945
946 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
947                               __be32 local, int scope)
948 {
949         int same = 0;
950         __be32 addr = 0;
951
952         for_ifa(in_dev) {
953                 if (!addr &&
954                     (local == ifa->ifa_local || !local) &&
955                     ifa->ifa_scope <= scope) {
956                         addr = ifa->ifa_local;
957                         if (same)
958                                 break;
959                 }
960                 if (!same) {
961                         same = (!local || inet_ifa_match(local, ifa)) &&
962                                 (!dst || inet_ifa_match(dst, ifa));
963                         if (same && addr) {
964                                 if (local || !dst)
965                                         break;
966                                 /* Is the selected addr into dst subnet? */
967                                 if (inet_ifa_match(addr, ifa))
968                                         break;
969                                 /* No, then can we use new local src? */
970                                 if (ifa->ifa_scope <= scope) {
971                                         addr = ifa->ifa_local;
972                                         break;
973                                 }
974                                 /* search for large dst subnet for addr */
975                                 same = 0;
976                         }
977                 }
978         } endfor_ifa(in_dev);
979
980         return same? addr : 0;
981 }
982
983 /*
984  * Confirm that local IP address exists using wildcards:
985  * - dev: only on this interface, 0=any interface
986  * - dst: only in the same subnet as dst, 0=any dst
987  * - local: address, 0=autoselect the local address
988  * - scope: maximum allowed scope value for the local address
989  */
990 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
991 {
992         __be32 addr = 0;
993         struct in_device *in_dev;
994
995         if (dev) {
996                 rcu_read_lock();
997                 if ((in_dev = __in_dev_get_rcu(dev)))
998                         addr = confirm_addr_indev(in_dev, dst, local, scope);
999                 rcu_read_unlock();
1000
1001                 return addr;
1002         }
1003
1004         read_lock(&dev_base_lock);
1005         rcu_read_lock();
1006         for (dev = dev_base; dev; dev = dev->next) {
1007                 if ((in_dev = __in_dev_get_rcu(dev))) {
1008                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1009                         if (addr)
1010                                 break;
1011                 }
1012         }
1013         rcu_read_unlock();
1014         read_unlock(&dev_base_lock);
1015
1016         return addr;
1017 }
1018
1019 /*
1020  *      Device notifier
1021  */
1022
1023 int register_inetaddr_notifier(struct notifier_block *nb)
1024 {
1025         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1026 }
1027
1028 int unregister_inetaddr_notifier(struct notifier_block *nb)
1029 {
1030         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1031 }
1032
1033 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1034  * alias numbering and to create unique labels if possible.
1035 */
1036 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1037
1038         struct in_ifaddr *ifa;
1039         int named = 0;
1040
1041         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 
1042                 char old[IFNAMSIZ], *dot; 
1043
1044                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1045                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 
1046                 if (named++ == 0)
1047                         continue;
1048                 dot = strchr(ifa->ifa_label, ':');
1049                 if (dot == NULL) { 
1050                         sprintf(old, ":%d", named); 
1051                         dot = old;
1052                 }
1053                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) { 
1054                         strcat(ifa->ifa_label, dot); 
1055                 } else { 
1056                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 
1057                 } 
1058         }       
1059
1060
1061 /* Called only under RTNL semaphore */
1062
1063 static int inetdev_event(struct notifier_block *this, unsigned long event,
1064                          void *ptr)
1065 {
1066         struct net_device *dev = ptr;
1067         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1068
1069         ASSERT_RTNL();
1070
1071         if (!in_dev) {
1072                 if (event == NETDEV_REGISTER && dev == &loopback_dev) {
1073                         in_dev = inetdev_init(dev);
1074                         if (!in_dev)
1075                                 panic("devinet: Failed to create loopback\n");
1076                         in_dev->cnf.no_xfrm = 1;
1077                         in_dev->cnf.no_policy = 1;
1078                 }
1079                 goto out;
1080         }
1081
1082         switch (event) {
1083         case NETDEV_REGISTER:
1084                 printk(KERN_DEBUG "inetdev_event: bug\n");
1085                 dev->ip_ptr = NULL;
1086                 break;
1087         case NETDEV_UP:
1088                 if (dev->mtu < 68)
1089                         break;
1090                 if (dev == &loopback_dev) {
1091                         struct in_ifaddr *ifa;
1092                         if ((ifa = inet_alloc_ifa()) != NULL) {
1093                                 ifa->ifa_local =
1094                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1095                                 ifa->ifa_prefixlen = 8;
1096                                 ifa->ifa_mask = inet_make_mask(8);
1097                                 in_dev_hold(in_dev);
1098                                 ifa->ifa_dev = in_dev;
1099                                 ifa->ifa_scope = RT_SCOPE_HOST;
1100                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1101                                 inet_insert_ifa(ifa);
1102                         }
1103                 }
1104                 ip_mc_up(in_dev);
1105                 break;
1106         case NETDEV_DOWN:
1107                 ip_mc_down(in_dev);
1108                 break;
1109         case NETDEV_CHANGEMTU:
1110                 if (dev->mtu >= 68)
1111                         break;
1112                 /* MTU falled under 68, disable IP */
1113         case NETDEV_UNREGISTER:
1114                 inetdev_destroy(in_dev);
1115                 break;
1116         case NETDEV_CHANGENAME:
1117                 /* Do not notify about label change, this event is
1118                  * not interesting to applications using netlink.
1119                  */
1120                 inetdev_changename(dev, in_dev);
1121
1122 #ifdef CONFIG_SYSCTL
1123                 devinet_sysctl_unregister(&in_dev->cnf);
1124                 neigh_sysctl_unregister(in_dev->arp_parms);
1125                 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1126                                       NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1127                 devinet_sysctl_register(in_dev, &in_dev->cnf);
1128 #endif
1129                 break;
1130         }
1131 out:
1132         return NOTIFY_DONE;
1133 }
1134
1135 static struct notifier_block ip_netdev_notifier = {
1136         .notifier_call =inetdev_event,
1137 };
1138
1139 static inline size_t inet_nlmsg_size(void)
1140 {
1141         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1142                + nla_total_size(4) /* IFA_ADDRESS */
1143                + nla_total_size(4) /* IFA_LOCAL */
1144                + nla_total_size(4) /* IFA_BROADCAST */
1145                + nla_total_size(4) /* IFA_ANYCAST */
1146                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1147 }
1148
1149 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1150                             u32 pid, u32 seq, int event, unsigned int flags)
1151 {
1152         struct ifaddrmsg *ifm;
1153         struct nlmsghdr  *nlh;
1154
1155         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1156         if (nlh == NULL)
1157                 return -ENOBUFS;
1158
1159         ifm = nlmsg_data(nlh);
1160         ifm->ifa_family = AF_INET;
1161         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1162         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1163         ifm->ifa_scope = ifa->ifa_scope;
1164         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1165
1166         if (ifa->ifa_address)
1167                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1168
1169         if (ifa->ifa_local)
1170                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1171
1172         if (ifa->ifa_broadcast)
1173                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1174
1175         if (ifa->ifa_anycast)
1176                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1177
1178         if (ifa->ifa_label[0])
1179                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1180
1181         return nlmsg_end(skb, nlh);
1182
1183 nla_put_failure:
1184         return nlmsg_cancel(skb, nlh);
1185 }
1186
1187 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1188 {
1189         int idx, ip_idx;
1190         struct net_device *dev;
1191         struct in_device *in_dev;
1192         struct in_ifaddr *ifa;
1193         struct sock *sk = skb->sk;
1194         int s_ip_idx, s_idx = cb->args[0];
1195
1196         s_ip_idx = ip_idx = cb->args[1];
1197         read_lock(&dev_base_lock);
1198         for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1199                 if (idx < s_idx)
1200                         continue;
1201                 if (idx > s_idx)
1202                         s_ip_idx = 0;
1203                 rcu_read_lock();
1204                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1205                         rcu_read_unlock();
1206                         continue;
1207                 }
1208
1209                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1210                      ifa = ifa->ifa_next, ip_idx++) {
1211                         if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
1212                                 !ifa_in_nx_info(ifa, sk->sk_nx_info))
1213                                 continue;
1214                         if (ip_idx < s_ip_idx)
1215                                 continue;
1216                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1217                                              cb->nlh->nlmsg_seq,
1218                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1219                                 rcu_read_unlock();
1220                                 goto done;
1221                         }
1222                 }
1223                 rcu_read_unlock();
1224         }
1225
1226 done:
1227         read_unlock(&dev_base_lock);
1228         cb->args[0] = idx;
1229         cb->args[1] = ip_idx;
1230
1231         return skb->len;
1232 }
1233
1234 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1235                       u32 pid)
1236 {
1237         struct sk_buff *skb;
1238         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1239         int err = -ENOBUFS;
1240
1241         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1242         if (skb == NULL)
1243                 goto errout;
1244
1245         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1246         /* failure implies BUG in inet_nlmsg_size() */
1247         BUG_ON(err < 0);
1248
1249         err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1250 errout:
1251         if (err < 0)
1252                 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1253 }
1254
1255 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1256         [RTM_NEWADDR  - RTM_BASE] = { .doit     = inet_rtm_newaddr,     },
1257         [RTM_DELADDR  - RTM_BASE] = { .doit     = inet_rtm_deladdr,     },
1258         [RTM_GETADDR  - RTM_BASE] = { .dumpit   = inet_dump_ifaddr,     },
1259         [RTM_NEWROUTE - RTM_BASE] = { .doit     = inet_rtm_newroute,    },
1260         [RTM_DELROUTE - RTM_BASE] = { .doit     = inet_rtm_delroute,    },
1261         [RTM_GETROUTE - RTM_BASE] = { .doit     = inet_rtm_getroute,
1262                                       .dumpit   = inet_dump_fib,        },
1263 #ifdef CONFIG_IP_MULTIPLE_TABLES
1264         [RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib4_rules_dump,      },
1265 #endif
1266 };
1267
1268 #ifdef CONFIG_SYSCTL
1269
1270 void inet_forward_change(void)
1271 {
1272         struct net_device *dev;
1273         int on = ipv4_devconf.forwarding;
1274
1275         ipv4_devconf.accept_redirects = !on;
1276         ipv4_devconf_dflt.forwarding = on;
1277
1278         read_lock(&dev_base_lock);
1279         for (dev = dev_base; dev; dev = dev->next) {
1280                 struct in_device *in_dev;
1281                 rcu_read_lock();
1282                 in_dev = __in_dev_get_rcu(dev);
1283                 if (in_dev)
1284                         in_dev->cnf.forwarding = on;
1285                 rcu_read_unlock();
1286         }
1287         read_unlock(&dev_base_lock);
1288
1289         rt_cache_flush(0);
1290 }
1291
1292 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1293                                   struct file* filp, void __user *buffer,
1294                                   size_t *lenp, loff_t *ppos)
1295 {
1296         int *valp = ctl->data;
1297         int val = *valp;
1298         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1299
1300         if (write && *valp != val) {
1301                 if (valp == &ipv4_devconf.forwarding)
1302                         inet_forward_change();
1303                 else if (valp != &ipv4_devconf_dflt.forwarding)
1304                         rt_cache_flush(0);
1305         }
1306
1307         return ret;
1308 }
1309
1310 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1311                          struct file* filp, void __user *buffer,
1312                          size_t *lenp, loff_t *ppos)
1313 {
1314         int *valp = ctl->data;
1315         int val = *valp;
1316         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1317
1318         if (write && *valp != val)
1319                 rt_cache_flush(0);
1320
1321         return ret;
1322 }
1323
1324 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1325                                   void __user *oldval, size_t __user *oldlenp,
1326                                   void __user *newval, size_t newlen)
1327 {
1328         int *valp = table->data;
1329         int new;
1330
1331         if (!newval || !newlen)
1332                 return 0;
1333
1334         if (newlen != sizeof(int))
1335                 return -EINVAL;
1336
1337         if (get_user(new, (int __user *)newval))
1338                 return -EFAULT;
1339
1340         if (new == *valp)
1341                 return 0;
1342
1343         if (oldval && oldlenp) {
1344                 size_t len;
1345
1346                 if (get_user(len, oldlenp))
1347                         return -EFAULT;
1348
1349                 if (len) {
1350                         if (len > table->maxlen)
1351                                 len = table->maxlen;
1352                         if (copy_to_user(oldval, valp, len))
1353                                 return -EFAULT;
1354                         if (put_user(len, oldlenp))
1355                                 return -EFAULT;
1356                 }
1357         }
1358
1359         *valp = new;
1360         rt_cache_flush(0);
1361         return 1;
1362 }
1363
1364
1365 static struct devinet_sysctl_table {
1366         struct ctl_table_header *sysctl_header;
1367         ctl_table               devinet_vars[__NET_IPV4_CONF_MAX];
1368         ctl_table               devinet_dev[2];
1369         ctl_table               devinet_conf_dir[2];
1370         ctl_table               devinet_proto_dir[2];
1371         ctl_table               devinet_root_dir[2];
1372 } devinet_sysctl = {
1373         .devinet_vars = {
1374                 {
1375                         .ctl_name       = NET_IPV4_CONF_FORWARDING,
1376                         .procname       = "forwarding",
1377                         .data           = &ipv4_devconf.forwarding,
1378                         .maxlen         = sizeof(int),
1379                         .mode           = 0644,
1380                         .proc_handler   = &devinet_sysctl_forward,
1381                 },
1382                 {
1383                         .ctl_name       = NET_IPV4_CONF_MC_FORWARDING,
1384                         .procname       = "mc_forwarding",
1385                         .data           = &ipv4_devconf.mc_forwarding,
1386                         .maxlen         = sizeof(int),
1387                         .mode           = 0444,
1388                         .proc_handler   = &proc_dointvec,
1389                 },
1390                 {
1391                         .ctl_name       = NET_IPV4_CONF_ACCEPT_REDIRECTS,
1392                         .procname       = "accept_redirects",
1393                         .data           = &ipv4_devconf.accept_redirects,
1394                         .maxlen         = sizeof(int),
1395                         .mode           = 0644,
1396                         .proc_handler   = &proc_dointvec,
1397                 },
1398                 {
1399                         .ctl_name       = NET_IPV4_CONF_SECURE_REDIRECTS,
1400                         .procname       = "secure_redirects",
1401                         .data           = &ipv4_devconf.secure_redirects,
1402                         .maxlen         = sizeof(int),
1403                         .mode           = 0644,
1404                         .proc_handler   = &proc_dointvec,
1405                 },
1406                 {
1407                         .ctl_name       = NET_IPV4_CONF_SHARED_MEDIA,
1408                         .procname       = "shared_media",
1409                         .data           = &ipv4_devconf.shared_media,
1410                         .maxlen         = sizeof(int),
1411                         .mode           = 0644,
1412                         .proc_handler   = &proc_dointvec,
1413                 },
1414                 {
1415                         .ctl_name       = NET_IPV4_CONF_RP_FILTER,
1416                         .procname       = "rp_filter",
1417                         .data           = &ipv4_devconf.rp_filter,
1418                         .maxlen         = sizeof(int),
1419                         .mode           = 0644,
1420                         .proc_handler   = &proc_dointvec,
1421                 },
1422                 {
1423                         .ctl_name       = NET_IPV4_CONF_SEND_REDIRECTS,
1424                         .procname       = "send_redirects",
1425                         .data           = &ipv4_devconf.send_redirects,
1426                         .maxlen         = sizeof(int),
1427                         .mode           = 0644,
1428                         .proc_handler   = &proc_dointvec,
1429                 },
1430                 {
1431                         .ctl_name       = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1432                         .procname       = "accept_source_route",
1433                         .data           = &ipv4_devconf.accept_source_route,
1434                         .maxlen         = sizeof(int),
1435                         .mode           = 0644,
1436                         .proc_handler   = &proc_dointvec,
1437                 },
1438                 {
1439                         .ctl_name       = NET_IPV4_CONF_PROXY_ARP,
1440                         .procname       = "proxy_arp",
1441                         .data           = &ipv4_devconf.proxy_arp,
1442                         .maxlen         = sizeof(int),
1443                         .mode           = 0644,
1444                         .proc_handler   = &proc_dointvec,
1445                 },
1446                 {
1447                         .ctl_name       = NET_IPV4_CONF_MEDIUM_ID,
1448                         .procname       = "medium_id",
1449                         .data           = &ipv4_devconf.medium_id,
1450                         .maxlen         = sizeof(int),
1451                         .mode           = 0644,
1452                         .proc_handler   = &proc_dointvec,
1453                 },
1454                 {
1455                         .ctl_name       = NET_IPV4_CONF_BOOTP_RELAY,
1456                         .procname       = "bootp_relay",
1457                         .data           = &ipv4_devconf.bootp_relay,
1458                         .maxlen         = sizeof(int),
1459                         .mode           = 0644,
1460                         .proc_handler   = &proc_dointvec,
1461                 },
1462                 {
1463                         .ctl_name       = NET_IPV4_CONF_LOG_MARTIANS,
1464                         .procname       = "log_martians",
1465                         .data           = &ipv4_devconf.log_martians,
1466                         .maxlen         = sizeof(int),
1467                         .mode           = 0644,
1468                         .proc_handler   = &proc_dointvec,
1469                 },
1470                 {
1471                         .ctl_name       = NET_IPV4_CONF_TAG,
1472                         .procname       = "tag",
1473                         .data           = &ipv4_devconf.tag,
1474                         .maxlen         = sizeof(int),
1475                         .mode           = 0644,
1476                         .proc_handler   = &proc_dointvec,
1477                 },
1478                 {
1479                         .ctl_name       = NET_IPV4_CONF_ARPFILTER,
1480                         .procname       = "arp_filter",
1481                         .data           = &ipv4_devconf.arp_filter,
1482                         .maxlen         = sizeof(int),
1483                         .mode           = 0644,
1484                         .proc_handler   = &proc_dointvec,
1485                 },
1486                 {
1487                         .ctl_name       = NET_IPV4_CONF_ARP_ANNOUNCE,
1488                         .procname       = "arp_announce",
1489                         .data           = &ipv4_devconf.arp_announce,
1490                         .maxlen         = sizeof(int),
1491                         .mode           = 0644,
1492                         .proc_handler   = &proc_dointvec,
1493                 },
1494                 {
1495                         .ctl_name       = NET_IPV4_CONF_ARP_IGNORE,
1496                         .procname       = "arp_ignore",
1497                         .data           = &ipv4_devconf.arp_ignore,
1498                         .maxlen         = sizeof(int),
1499                         .mode           = 0644,
1500                         .proc_handler   = &proc_dointvec,
1501                 },
1502                 {
1503                         .ctl_name       = NET_IPV4_CONF_ARP_ACCEPT,
1504                         .procname       = "arp_accept",
1505                         .data           = &ipv4_devconf.arp_accept,
1506                         .maxlen         = sizeof(int),
1507                         .mode           = 0644,
1508                         .proc_handler   = &proc_dointvec,
1509                 },
1510                 {
1511                         .ctl_name       = NET_IPV4_CONF_NOXFRM,
1512                         .procname       = "disable_xfrm",
1513                         .data           = &ipv4_devconf.no_xfrm,
1514                         .maxlen         = sizeof(int),
1515                         .mode           = 0644,
1516                         .proc_handler   = &ipv4_doint_and_flush,
1517                         .strategy       = &ipv4_doint_and_flush_strategy,
1518                 },
1519                 {
1520                         .ctl_name       = NET_IPV4_CONF_NOPOLICY,
1521                         .procname       = "disable_policy",
1522                         .data           = &ipv4_devconf.no_policy,
1523                         .maxlen         = sizeof(int),
1524                         .mode           = 0644,
1525                         .proc_handler   = &ipv4_doint_and_flush,
1526                         .strategy       = &ipv4_doint_and_flush_strategy,
1527                 },
1528                 {
1529                         .ctl_name       = NET_IPV4_CONF_FORCE_IGMP_VERSION,
1530                         .procname       = "force_igmp_version",
1531                         .data           = &ipv4_devconf.force_igmp_version,
1532                         .maxlen         = sizeof(int),
1533                         .mode           = 0644,
1534                         .proc_handler   = &ipv4_doint_and_flush,
1535                         .strategy       = &ipv4_doint_and_flush_strategy,
1536                 },
1537                 {
1538                         .ctl_name       = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1539                         .procname       = "promote_secondaries",
1540                         .data           = &ipv4_devconf.promote_secondaries,
1541                         .maxlen         = sizeof(int),
1542                         .mode           = 0644,
1543                         .proc_handler   = &ipv4_doint_and_flush,
1544                         .strategy       = &ipv4_doint_and_flush_strategy,
1545                 },
1546         },
1547         .devinet_dev = {
1548                 {
1549                         .ctl_name       = NET_PROTO_CONF_ALL,
1550                         .procname       = "all",
1551                         .mode           = 0555,
1552                         .child          = devinet_sysctl.devinet_vars,
1553                 },
1554         },
1555         .devinet_conf_dir = {
1556                 {
1557                         .ctl_name       = NET_IPV4_CONF,
1558                         .procname       = "conf",
1559                         .mode           = 0555,
1560                         .child          = devinet_sysctl.devinet_dev,
1561                 },
1562         },
1563         .devinet_proto_dir = {
1564                 {
1565                         .ctl_name       = NET_IPV4,
1566                         .procname       = "ipv4",
1567                         .mode           = 0555,
1568                         .child          = devinet_sysctl.devinet_conf_dir,
1569                 },
1570         },
1571         .devinet_root_dir = {
1572                 {
1573                         .ctl_name       = CTL_NET,
1574                         .procname       = "net",
1575                         .mode           = 0555,
1576                         .child          = devinet_sysctl.devinet_proto_dir,
1577                 },
1578         },
1579 };
1580
1581 static void devinet_sysctl_register(struct in_device *in_dev,
1582                                     struct ipv4_devconf *p)
1583 {
1584         int i;
1585         struct net_device *dev = in_dev ? in_dev->dev : NULL;
1586         struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1587                                                  GFP_KERNEL);
1588         char *dev_name = NULL;
1589
1590         if (!t)
1591                 return;
1592         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1593                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1594                 t->devinet_vars[i].de = NULL;
1595         }
1596
1597         if (dev) {
1598                 dev_name = dev->name; 
1599                 t->devinet_dev[0].ctl_name = dev->ifindex;
1600         } else {
1601                 dev_name = "default";
1602                 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1603         }
1604
1605         /* 
1606          * Make a copy of dev_name, because '.procname' is regarded as const 
1607          * by sysctl and we wouldn't want anyone to change it under our feet
1608          * (see SIOCSIFNAME).
1609          */     
1610         dev_name = kstrdup(dev_name, GFP_KERNEL);
1611         if (!dev_name)
1612             goto free;
1613
1614         t->devinet_dev[0].procname    = dev_name;
1615         t->devinet_dev[0].child       = t->devinet_vars;
1616         t->devinet_dev[0].de          = NULL;
1617         t->devinet_conf_dir[0].child  = t->devinet_dev;
1618         t->devinet_conf_dir[0].de     = NULL;
1619         t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1620         t->devinet_proto_dir[0].de    = NULL;
1621         t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1622         t->devinet_root_dir[0].de     = NULL;
1623
1624         t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
1625         if (!t->sysctl_header)
1626             goto free_procname;
1627
1628         p->sysctl = t;
1629         return;
1630
1631         /* error path */
1632  free_procname:
1633         kfree(dev_name);
1634  free:
1635         kfree(t);
1636         return;
1637 }
1638
1639 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1640 {
1641         if (p->sysctl) {
1642                 struct devinet_sysctl_table *t = p->sysctl;
1643                 p->sysctl = NULL;
1644                 unregister_sysctl_table(t->sysctl_header);
1645                 kfree(t->devinet_dev[0].procname);
1646                 kfree(t);
1647         }
1648 }
1649 #endif
1650
1651 void __init devinet_init(void)
1652 {
1653         register_gifconf(PF_INET, inet_gifconf);
1654         register_netdevice_notifier(&ip_netdev_notifier);
1655         rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1656 #ifdef CONFIG_SYSCTL
1657         devinet_sysctl.sysctl_header =
1658                 register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
1659         devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1660 #endif
1661 }
1662
1663 EXPORT_SYMBOL(in_dev_finish_destroy);
1664 EXPORT_SYMBOL(inet_select_addr);
1665 EXPORT_SYMBOL(inetdev_by_index);
1666 EXPORT_SYMBOL(register_inetaddr_notifier);
1667 EXPORT_SYMBOL(unregister_inetaddr_notifier);