2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/sched.h>
39 #include <linux/string.h>
41 #include <linux/socket.h>
42 #include <linux/sockios.h>
44 #include <linux/errno.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_addr.h>
47 #include <linux/if_ether.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/etherdevice.h>
51 #include <linux/skbuff.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/notifier.h>
55 #include <linux/inetdevice.h>
56 #include <linux/igmp.h>
58 #include <linux/sysctl.h>
60 #include <linux/kmod.h>
61 #include <linux/vs_context.h>
65 #include <net/route.h>
66 #include <net/ip_fib.h>
67 #include <net/netlink.h>
69 struct ipv4_devconf ipv4_devconf = {
70 .accept_redirects = 1,
72 .secure_redirects = 1,
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 .accept_redirects = 1,
79 .secure_redirects = 1,
81 .accept_source_route = 1,
84 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
85 [IFA_LOCAL] = { .type = NLA_U32 },
86 [IFA_ADDRESS] = { .type = NLA_U32 },
87 [IFA_BROADCAST] = { .type = NLA_U32 },
88 [IFA_ANYCAST] = { .type = NLA_U32 },
89 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
94 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
95 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98 static void devinet_sysctl_register(struct in_device *in_dev,
99 struct ipv4_devconf *p);
100 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
103 /* Locks all the inet devices. */
105 static struct in_ifaddr *inet_alloc_ifa(void)
107 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
110 INIT_RCU_HEAD(&ifa->rcu_head);
116 static void inet_rcu_free_ifa(struct rcu_head *head)
118 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
120 in_dev_put(ifa->ifa_dev);
124 static inline void inet_free_ifa(struct in_ifaddr *ifa)
126 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129 void in_dev_finish_destroy(struct in_device *idev)
131 struct net_device *dev = idev->dev;
133 BUG_TRAP(!idev->ifa_list);
134 BUG_TRAP(!idev->mc_list);
135 #ifdef NET_REFCNT_DEBUG
136 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
137 idev, dev ? dev->name : "NIL");
141 printk("Freeing alive in_device %p\n", idev);
147 struct in_device *inetdev_init(struct net_device *dev)
149 struct in_device *in_dev;
153 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156 INIT_RCU_HEAD(&in_dev->rcu_head);
157 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
158 in_dev->cnf.sysctl = NULL;
160 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
162 /* Reference in_dev->dev */
165 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
166 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
169 /* Account for reference dev->ip_ptr (below) */
173 devinet_sysctl_register(in_dev, &in_dev->cnf);
175 ip_mc_init_dev(in_dev);
176 if (dev->flags & IFF_UP)
179 /* we can receive as soon as ip_ptr is set -- do this last */
180 rcu_assign_pointer(dev->ip_ptr, in_dev);
189 static void in_dev_rcu_put(struct rcu_head *head)
191 struct in_device *idev = container_of(head, struct in_device, rcu_head);
195 static void inetdev_destroy(struct in_device *in_dev)
197 struct in_ifaddr *ifa;
198 struct net_device *dev;
203 if (dev == &loopback_dev)
208 ip_mc_destroy_dev(in_dev);
210 while ((ifa = in_dev->ifa_list) != NULL) {
211 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
216 devinet_sysctl_unregister(&in_dev->cnf);
222 neigh_sysctl_unregister(in_dev->arp_parms);
224 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
227 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
230 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
233 for_primary_ifa(in_dev) {
234 if (inet_ifa_match(a, ifa)) {
235 if (!b || inet_ifa_match(b, ifa)) {
240 } endfor_ifa(in_dev);
245 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
246 int destroy, struct nlmsghdr *nlh, u32 pid)
248 struct in_ifaddr *promote = NULL;
249 struct in_ifaddr *ifa, *ifa1 = *ifap;
250 struct in_ifaddr *last_prim = in_dev->ifa_list;
251 struct in_ifaddr *prev_prom = NULL;
252 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
256 /* 1. Deleting primary ifaddr forces deletion all secondaries
257 * unless alias promotion is set
260 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
261 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
263 while ((ifa = *ifap1) != NULL) {
264 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
265 ifa1->ifa_scope <= ifa->ifa_scope)
268 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
269 ifa1->ifa_mask != ifa->ifa_mask ||
270 !inet_ifa_match(ifa1->ifa_address, ifa)) {
271 ifap1 = &ifa->ifa_next;
277 *ifap1 = ifa->ifa_next;
279 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
280 blocking_notifier_call_chain(&inetaddr_chain,
292 *ifap = ifa1->ifa_next;
294 /* 3. Announce address deletion */
296 /* Send message first, then call notifier.
297 At first sight, FIB update triggered by notifier
298 will refer to already deleted ifaddr, that could confuse
299 netlink listeners. It is not true: look, gated sees
300 that route deleted and if it still thinks that ifaddr
301 is valid, it will try to restore deleted routes... Grr.
302 So that, this order is correct.
304 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
305 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
310 prev_prom->ifa_next = promote->ifa_next;
311 promote->ifa_next = last_prim->ifa_next;
312 last_prim->ifa_next = promote;
315 promote->ifa_flags &= ~IFA_F_SECONDARY;
316 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
317 blocking_notifier_call_chain(&inetaddr_chain,
319 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
320 if (ifa1->ifa_mask != ifa->ifa_mask ||
321 !inet_ifa_match(ifa1->ifa_address, ifa))
330 if (!in_dev->ifa_list)
331 inetdev_destroy(in_dev);
335 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
338 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
341 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
344 struct in_device *in_dev = ifa->ifa_dev;
345 struct in_ifaddr *ifa1, **ifap, **last_primary;
349 if (!ifa->ifa_local) {
354 ifa->ifa_flags &= ~IFA_F_SECONDARY;
355 last_primary = &in_dev->ifa_list;
357 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
358 ifap = &ifa1->ifa_next) {
359 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
360 ifa->ifa_scope <= ifa1->ifa_scope)
361 last_primary = &ifa1->ifa_next;
362 if (ifa1->ifa_mask == ifa->ifa_mask &&
363 inet_ifa_match(ifa1->ifa_address, ifa)) {
364 if (ifa1->ifa_local == ifa->ifa_local) {
368 if (ifa1->ifa_scope != ifa->ifa_scope) {
372 ifa->ifa_flags |= IFA_F_SECONDARY;
376 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
377 net_srandom(ifa->ifa_local);
381 ifa->ifa_next = *ifap;
384 /* Send message first, then call notifier.
385 Notifier will trigger FIB update, so that
386 listeners of netlink will know about new ifaddr */
387 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
388 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
393 static int inet_insert_ifa(struct in_ifaddr *ifa)
395 return __inet_insert_ifa(ifa, NULL, 0);
398 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
400 struct in_device *in_dev = __in_dev_get_rtnl(dev);
405 in_dev = inetdev_init(dev);
411 if (ifa->ifa_dev != in_dev) {
412 BUG_TRAP(!ifa->ifa_dev);
414 ifa->ifa_dev = in_dev;
416 if (LOOPBACK(ifa->ifa_local))
417 ifa->ifa_scope = RT_SCOPE_HOST;
418 return inet_insert_ifa(ifa);
421 struct in_device *inetdev_by_index(int ifindex)
423 struct net_device *dev;
424 struct in_device *in_dev = NULL;
425 read_lock(&dev_base_lock);
426 dev = __dev_get_by_index(ifindex);
428 in_dev = in_dev_get(dev);
429 read_unlock(&dev_base_lock);
433 /* Called only from RTNL semaphored context. No locks. */
435 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
440 for_primary_ifa(in_dev) {
441 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
443 } endfor_ifa(in_dev);
447 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
449 struct nlattr *tb[IFA_MAX+1];
450 struct in_device *in_dev;
451 struct ifaddrmsg *ifm;
452 struct in_ifaddr *ifa, **ifap;
457 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
461 ifm = nlmsg_data(nlh);
462 in_dev = inetdev_by_index(ifm->ifa_index);
463 if (in_dev == NULL) {
468 __in_dev_put(in_dev);
470 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
471 ifap = &ifa->ifa_next) {
473 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
476 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
479 if (tb[IFA_ADDRESS] &&
480 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
481 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
484 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
488 err = -EADDRNOTAVAIL;
493 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
495 struct nlattr *tb[IFA_MAX+1];
496 struct in_ifaddr *ifa;
497 struct ifaddrmsg *ifm;
498 struct net_device *dev;
499 struct in_device *in_dev;
502 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
506 ifm = nlmsg_data(nlh);
507 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
512 dev = __dev_get_by_index(ifm->ifa_index);
518 in_dev = __in_dev_get_rtnl(dev);
519 if (in_dev == NULL) {
520 in_dev = inetdev_init(dev);
521 if (in_dev == NULL) {
527 ifa = inet_alloc_ifa();
530 * A potential indev allocation can be left alive, it stays
531 * assigned to its device and is destroy with it.
539 if (tb[IFA_ADDRESS] == NULL)
540 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
542 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
543 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
544 ifa->ifa_flags = ifm->ifa_flags;
545 ifa->ifa_scope = ifm->ifa_scope;
546 ifa->ifa_dev = in_dev;
548 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
549 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
551 if (tb[IFA_BROADCAST])
552 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
555 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
558 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
560 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
568 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
570 struct in_ifaddr *ifa;
574 ifa = rtm_to_ifaddr(nlh);
578 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
582 * Determine a default network mask, based on the IP address.
585 static __inline__ int inet_abc_len(__be32 addr)
587 int rc = -1; /* Something else, probably a multicast. */
592 __u32 haddr = ntohl(addr);
594 if (IN_CLASSA(haddr))
596 else if (IN_CLASSB(haddr))
598 else if (IN_CLASSC(haddr))
606 int devinet_ioctl(unsigned int cmd, void __user *arg)
609 struct sockaddr_in sin_orig;
610 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
611 struct in_device *in_dev;
612 struct in_ifaddr **ifap = NULL;
613 struct in_ifaddr *ifa = NULL;
614 struct net_device *dev;
617 int tryaddrmatch = 0;
620 * Fetch the caller's info block into kernel space
623 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
625 ifr.ifr_name[IFNAMSIZ - 1] = 0;
627 /* save original address for comparison */
628 memcpy(&sin_orig, sin, sizeof(*sin));
630 colon = strchr(ifr.ifr_name, ':');
635 dev_load(ifr.ifr_name);
639 case SIOCGIFADDR: /* Get interface address */
640 case SIOCGIFBRDADDR: /* Get the broadcast address */
641 case SIOCGIFDSTADDR: /* Get the destination address */
642 case SIOCGIFNETMASK: /* Get the netmask for the interface */
643 /* Note that these ioctls will not sleep,
644 so that we do not impose a lock.
645 One day we will be forced to put shlock here (I mean SMP)
647 tryaddrmatch = (sin_orig.sin_family == AF_INET);
648 memset(sin, 0, sizeof(*sin));
649 sin->sin_family = AF_INET;
654 if (!capable(CAP_NET_ADMIN))
657 case SIOCSIFADDR: /* Set interface address (and family) */
658 case SIOCSIFBRDADDR: /* Set the broadcast address */
659 case SIOCSIFDSTADDR: /* Set the destination address */
660 case SIOCSIFNETMASK: /* Set the netmask for the interface */
662 if (!capable(CAP_NET_ADMIN))
665 if (sin->sin_family != AF_INET)
676 if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
682 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
683 struct nx_info *nxi = current->nx_info;
684 int hide_netif = vx_flags(VXF_HIDE_NETIF, 0);
687 /* Matthias Andree */
688 /* compare label and address (4.4BSD style) */
689 /* note: we only do this for a limited set of ioctls
690 and only if the original address family was AF_INET.
691 This is checked above. */
692 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
693 ifap = &ifa->ifa_next) {
694 if (hide_netif && !ifa_in_nx_info(ifa, nxi))
696 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
697 sin_orig.sin_addr.s_addr ==
703 /* we didn't get a match, maybe the application is
704 4.3BSD-style and passed in junk so we fall back to
705 comparing just the label */
707 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
708 ifap = &ifa->ifa_next) {
709 if (hide_netif && !ifa_in_nx_info(ifa, nxi))
711 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
717 ret = -EADDRNOTAVAIL;
718 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
722 case SIOCGIFADDR: /* Get interface address */
723 sin->sin_addr.s_addr = ifa->ifa_local;
726 case SIOCGIFBRDADDR: /* Get the broadcast address */
727 sin->sin_addr.s_addr = ifa->ifa_broadcast;
730 case SIOCGIFDSTADDR: /* Get the destination address */
731 sin->sin_addr.s_addr = ifa->ifa_address;
734 case SIOCGIFNETMASK: /* Get the netmask for the interface */
735 sin->sin_addr.s_addr = ifa->ifa_mask;
740 ret = -EADDRNOTAVAIL;
744 if (!(ifr.ifr_flags & IFF_UP))
745 inet_del_ifa(in_dev, ifap, 1);
748 ret = dev_change_flags(dev, ifr.ifr_flags);
751 case SIOCSIFADDR: /* Set interface address (and family) */
753 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
758 if ((ifa = inet_alloc_ifa()) == NULL)
761 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
763 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
766 if (ifa->ifa_local == sin->sin_addr.s_addr)
768 inet_del_ifa(in_dev, ifap, 0);
769 ifa->ifa_broadcast = 0;
770 ifa->ifa_anycast = 0;
773 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
775 if (!(dev->flags & IFF_POINTOPOINT)) {
776 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
777 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
778 if ((dev->flags & IFF_BROADCAST) &&
779 ifa->ifa_prefixlen < 31)
780 ifa->ifa_broadcast = ifa->ifa_address |
783 ifa->ifa_prefixlen = 32;
784 ifa->ifa_mask = inet_make_mask(32);
786 ret = inet_set_ifa(dev, ifa);
789 case SIOCSIFBRDADDR: /* Set the broadcast address */
791 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
792 inet_del_ifa(in_dev, ifap, 0);
793 ifa->ifa_broadcast = sin->sin_addr.s_addr;
794 inet_insert_ifa(ifa);
798 case SIOCSIFDSTADDR: /* Set the destination address */
800 if (ifa->ifa_address == sin->sin_addr.s_addr)
803 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
806 inet_del_ifa(in_dev, ifap, 0);
807 ifa->ifa_address = sin->sin_addr.s_addr;
808 inet_insert_ifa(ifa);
811 case SIOCSIFNETMASK: /* Set the netmask for the interface */
814 * The mask we set must be legal.
817 if (bad_mask(sin->sin_addr.s_addr, 0))
820 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
821 __be32 old_mask = ifa->ifa_mask;
822 inet_del_ifa(in_dev, ifap, 0);
823 ifa->ifa_mask = sin->sin_addr.s_addr;
824 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
826 /* See if current broadcast address matches
827 * with current netmask, then recalculate
828 * the broadcast address. Otherwise it's a
829 * funny address, so don't touch it since
830 * the user seems to know what (s)he's doing...
832 if ((dev->flags & IFF_BROADCAST) &&
833 (ifa->ifa_prefixlen < 31) &&
834 (ifa->ifa_broadcast ==
835 (ifa->ifa_local|~old_mask))) {
836 ifa->ifa_broadcast = (ifa->ifa_local |
837 ~sin->sin_addr.s_addr);
839 inet_insert_ifa(ifa);
849 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
853 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
855 struct in_device *in_dev = __in_dev_get_rtnl(dev);
856 struct in_ifaddr *ifa;
860 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
863 for (; ifa; ifa = ifa->ifa_next) {
864 if (vx_flags(VXF_HIDE_NETIF, 0) &&
865 !ifa_in_nx_info(ifa, current->nx_info))
871 if (len < (int) sizeof(ifr))
873 memset(&ifr, 0, sizeof(struct ifreq));
875 strcpy(ifr.ifr_name, ifa->ifa_label);
877 strcpy(ifr.ifr_name, dev->name);
879 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
880 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
883 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
887 buf += sizeof(struct ifreq);
888 len -= sizeof(struct ifreq);
889 done += sizeof(struct ifreq);
895 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
898 struct in_device *in_dev;
901 in_dev = __in_dev_get_rcu(dev);
905 for_primary_ifa(in_dev) {
906 if (ifa->ifa_scope > scope)
908 if (!dst || inet_ifa_match(dst, ifa)) {
909 addr = ifa->ifa_local;
913 addr = ifa->ifa_local;
914 } endfor_ifa(in_dev);
921 /* Not loopback addresses on loopback should be preferred
922 in this case. It is importnat that lo is the first interface
925 read_lock(&dev_base_lock);
927 for (dev = dev_base; dev; dev = dev->next) {
928 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
931 for_primary_ifa(in_dev) {
932 if (ifa->ifa_scope != RT_SCOPE_LINK &&
933 ifa->ifa_scope <= scope) {
934 addr = ifa->ifa_local;
935 goto out_unlock_both;
937 } endfor_ifa(in_dev);
940 read_unlock(&dev_base_lock);
946 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
947 __be32 local, int scope)
954 (local == ifa->ifa_local || !local) &&
955 ifa->ifa_scope <= scope) {
956 addr = ifa->ifa_local;
961 same = (!local || inet_ifa_match(local, ifa)) &&
962 (!dst || inet_ifa_match(dst, ifa));
966 /* Is the selected addr into dst subnet? */
967 if (inet_ifa_match(addr, ifa))
969 /* No, then can we use new local src? */
970 if (ifa->ifa_scope <= scope) {
971 addr = ifa->ifa_local;
974 /* search for large dst subnet for addr */
978 } endfor_ifa(in_dev);
980 return same? addr : 0;
984 * Confirm that local IP address exists using wildcards:
985 * - dev: only on this interface, 0=any interface
986 * - dst: only in the same subnet as dst, 0=any dst
987 * - local: address, 0=autoselect the local address
988 * - scope: maximum allowed scope value for the local address
990 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
993 struct in_device *in_dev;
997 if ((in_dev = __in_dev_get_rcu(dev)))
998 addr = confirm_addr_indev(in_dev, dst, local, scope);
1004 read_lock(&dev_base_lock);
1006 for (dev = dev_base; dev; dev = dev->next) {
1007 if ((in_dev = __in_dev_get_rcu(dev))) {
1008 addr = confirm_addr_indev(in_dev, dst, local, scope);
1014 read_unlock(&dev_base_lock);
1023 int register_inetaddr_notifier(struct notifier_block *nb)
1025 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1028 int unregister_inetaddr_notifier(struct notifier_block *nb)
1030 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1033 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1034 * alias numbering and to create unique labels if possible.
1036 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1038 struct in_ifaddr *ifa;
1041 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1042 char old[IFNAMSIZ], *dot;
1044 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1045 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1048 dot = strchr(ifa->ifa_label, ':');
1050 sprintf(old, ":%d", named);
1053 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1054 strcat(ifa->ifa_label, dot);
1056 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1061 /* Called only under RTNL semaphore */
1063 static int inetdev_event(struct notifier_block *this, unsigned long event,
1066 struct net_device *dev = ptr;
1067 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1072 if (event == NETDEV_REGISTER && dev == &loopback_dev) {
1073 in_dev = inetdev_init(dev);
1075 panic("devinet: Failed to create loopback\n");
1076 in_dev->cnf.no_xfrm = 1;
1077 in_dev->cnf.no_policy = 1;
1083 case NETDEV_REGISTER:
1084 printk(KERN_DEBUG "inetdev_event: bug\n");
1090 if (dev == &loopback_dev) {
1091 struct in_ifaddr *ifa;
1092 if ((ifa = inet_alloc_ifa()) != NULL) {
1094 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1095 ifa->ifa_prefixlen = 8;
1096 ifa->ifa_mask = inet_make_mask(8);
1097 in_dev_hold(in_dev);
1098 ifa->ifa_dev = in_dev;
1099 ifa->ifa_scope = RT_SCOPE_HOST;
1100 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1101 inet_insert_ifa(ifa);
1109 case NETDEV_CHANGEMTU:
1112 /* MTU falled under 68, disable IP */
1113 case NETDEV_UNREGISTER:
1114 inetdev_destroy(in_dev);
1116 case NETDEV_CHANGENAME:
1117 /* Do not notify about label change, this event is
1118 * not interesting to applications using netlink.
1120 inetdev_changename(dev, in_dev);
1122 #ifdef CONFIG_SYSCTL
1123 devinet_sysctl_unregister(&in_dev->cnf);
1124 neigh_sysctl_unregister(in_dev->arp_parms);
1125 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1126 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1127 devinet_sysctl_register(in_dev, &in_dev->cnf);
1135 static struct notifier_block ip_netdev_notifier = {
1136 .notifier_call =inetdev_event,
1139 static inline size_t inet_nlmsg_size(void)
1141 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1142 + nla_total_size(4) /* IFA_ADDRESS */
1143 + nla_total_size(4) /* IFA_LOCAL */
1144 + nla_total_size(4) /* IFA_BROADCAST */
1145 + nla_total_size(4) /* IFA_ANYCAST */
1146 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1149 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1150 u32 pid, u32 seq, int event, unsigned int flags)
1152 struct ifaddrmsg *ifm;
1153 struct nlmsghdr *nlh;
1155 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1159 ifm = nlmsg_data(nlh);
1160 ifm->ifa_family = AF_INET;
1161 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1162 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1163 ifm->ifa_scope = ifa->ifa_scope;
1164 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1166 if (ifa->ifa_address)
1167 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1170 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1172 if (ifa->ifa_broadcast)
1173 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1175 if (ifa->ifa_anycast)
1176 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1178 if (ifa->ifa_label[0])
1179 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1181 return nlmsg_end(skb, nlh);
1184 return nlmsg_cancel(skb, nlh);
1187 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1190 struct net_device *dev;
1191 struct in_device *in_dev;
1192 struct in_ifaddr *ifa;
1193 struct sock *sk = skb->sk;
1194 int s_ip_idx, s_idx = cb->args[0];
1196 s_ip_idx = ip_idx = cb->args[1];
1197 read_lock(&dev_base_lock);
1198 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1204 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1209 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1210 ifa = ifa->ifa_next, ip_idx++) {
1211 if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
1212 !ifa_in_nx_info(ifa, sk->sk_nx_info))
1214 if (ip_idx < s_ip_idx)
1216 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1218 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1227 read_unlock(&dev_base_lock);
1229 cb->args[1] = ip_idx;
1234 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1237 struct sk_buff *skb;
1238 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1241 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1245 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1246 /* failure implies BUG in inet_nlmsg_size() */
1249 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1252 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1255 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1256 [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, },
1257 [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, },
1258 [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, },
1259 [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, },
1260 [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, },
1261 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
1262 .dumpit = inet_dump_fib, },
1263 #ifdef CONFIG_IP_MULTIPLE_TABLES
1264 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
1268 #ifdef CONFIG_SYSCTL
1270 void inet_forward_change(void)
1272 struct net_device *dev;
1273 int on = ipv4_devconf.forwarding;
1275 ipv4_devconf.accept_redirects = !on;
1276 ipv4_devconf_dflt.forwarding = on;
1278 read_lock(&dev_base_lock);
1279 for (dev = dev_base; dev; dev = dev->next) {
1280 struct in_device *in_dev;
1282 in_dev = __in_dev_get_rcu(dev);
1284 in_dev->cnf.forwarding = on;
1287 read_unlock(&dev_base_lock);
1292 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1293 struct file* filp, void __user *buffer,
1294 size_t *lenp, loff_t *ppos)
1296 int *valp = ctl->data;
1298 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1300 if (write && *valp != val) {
1301 if (valp == &ipv4_devconf.forwarding)
1302 inet_forward_change();
1303 else if (valp != &ipv4_devconf_dflt.forwarding)
1310 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1311 struct file* filp, void __user *buffer,
1312 size_t *lenp, loff_t *ppos)
1314 int *valp = ctl->data;
1316 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1318 if (write && *valp != val)
1324 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1325 void __user *oldval, size_t __user *oldlenp,
1326 void __user *newval, size_t newlen)
1328 int *valp = table->data;
1331 if (!newval || !newlen)
1334 if (newlen != sizeof(int))
1337 if (get_user(new, (int __user *)newval))
1343 if (oldval && oldlenp) {
1346 if (get_user(len, oldlenp))
1350 if (len > table->maxlen)
1351 len = table->maxlen;
1352 if (copy_to_user(oldval, valp, len))
1354 if (put_user(len, oldlenp))
1365 static struct devinet_sysctl_table {
1366 struct ctl_table_header *sysctl_header;
1367 ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1368 ctl_table devinet_dev[2];
1369 ctl_table devinet_conf_dir[2];
1370 ctl_table devinet_proto_dir[2];
1371 ctl_table devinet_root_dir[2];
1372 } devinet_sysctl = {
1375 .ctl_name = NET_IPV4_CONF_FORWARDING,
1376 .procname = "forwarding",
1377 .data = &ipv4_devconf.forwarding,
1378 .maxlen = sizeof(int),
1380 .proc_handler = &devinet_sysctl_forward,
1383 .ctl_name = NET_IPV4_CONF_MC_FORWARDING,
1384 .procname = "mc_forwarding",
1385 .data = &ipv4_devconf.mc_forwarding,
1386 .maxlen = sizeof(int),
1388 .proc_handler = &proc_dointvec,
1391 .ctl_name = NET_IPV4_CONF_ACCEPT_REDIRECTS,
1392 .procname = "accept_redirects",
1393 .data = &ipv4_devconf.accept_redirects,
1394 .maxlen = sizeof(int),
1396 .proc_handler = &proc_dointvec,
1399 .ctl_name = NET_IPV4_CONF_SECURE_REDIRECTS,
1400 .procname = "secure_redirects",
1401 .data = &ipv4_devconf.secure_redirects,
1402 .maxlen = sizeof(int),
1404 .proc_handler = &proc_dointvec,
1407 .ctl_name = NET_IPV4_CONF_SHARED_MEDIA,
1408 .procname = "shared_media",
1409 .data = &ipv4_devconf.shared_media,
1410 .maxlen = sizeof(int),
1412 .proc_handler = &proc_dointvec,
1415 .ctl_name = NET_IPV4_CONF_RP_FILTER,
1416 .procname = "rp_filter",
1417 .data = &ipv4_devconf.rp_filter,
1418 .maxlen = sizeof(int),
1420 .proc_handler = &proc_dointvec,
1423 .ctl_name = NET_IPV4_CONF_SEND_REDIRECTS,
1424 .procname = "send_redirects",
1425 .data = &ipv4_devconf.send_redirects,
1426 .maxlen = sizeof(int),
1428 .proc_handler = &proc_dointvec,
1431 .ctl_name = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1432 .procname = "accept_source_route",
1433 .data = &ipv4_devconf.accept_source_route,
1434 .maxlen = sizeof(int),
1436 .proc_handler = &proc_dointvec,
1439 .ctl_name = NET_IPV4_CONF_PROXY_ARP,
1440 .procname = "proxy_arp",
1441 .data = &ipv4_devconf.proxy_arp,
1442 .maxlen = sizeof(int),
1444 .proc_handler = &proc_dointvec,
1447 .ctl_name = NET_IPV4_CONF_MEDIUM_ID,
1448 .procname = "medium_id",
1449 .data = &ipv4_devconf.medium_id,
1450 .maxlen = sizeof(int),
1452 .proc_handler = &proc_dointvec,
1455 .ctl_name = NET_IPV4_CONF_BOOTP_RELAY,
1456 .procname = "bootp_relay",
1457 .data = &ipv4_devconf.bootp_relay,
1458 .maxlen = sizeof(int),
1460 .proc_handler = &proc_dointvec,
1463 .ctl_name = NET_IPV4_CONF_LOG_MARTIANS,
1464 .procname = "log_martians",
1465 .data = &ipv4_devconf.log_martians,
1466 .maxlen = sizeof(int),
1468 .proc_handler = &proc_dointvec,
1471 .ctl_name = NET_IPV4_CONF_TAG,
1473 .data = &ipv4_devconf.tag,
1474 .maxlen = sizeof(int),
1476 .proc_handler = &proc_dointvec,
1479 .ctl_name = NET_IPV4_CONF_ARPFILTER,
1480 .procname = "arp_filter",
1481 .data = &ipv4_devconf.arp_filter,
1482 .maxlen = sizeof(int),
1484 .proc_handler = &proc_dointvec,
1487 .ctl_name = NET_IPV4_CONF_ARP_ANNOUNCE,
1488 .procname = "arp_announce",
1489 .data = &ipv4_devconf.arp_announce,
1490 .maxlen = sizeof(int),
1492 .proc_handler = &proc_dointvec,
1495 .ctl_name = NET_IPV4_CONF_ARP_IGNORE,
1496 .procname = "arp_ignore",
1497 .data = &ipv4_devconf.arp_ignore,
1498 .maxlen = sizeof(int),
1500 .proc_handler = &proc_dointvec,
1503 .ctl_name = NET_IPV4_CONF_ARP_ACCEPT,
1504 .procname = "arp_accept",
1505 .data = &ipv4_devconf.arp_accept,
1506 .maxlen = sizeof(int),
1508 .proc_handler = &proc_dointvec,
1511 .ctl_name = NET_IPV4_CONF_NOXFRM,
1512 .procname = "disable_xfrm",
1513 .data = &ipv4_devconf.no_xfrm,
1514 .maxlen = sizeof(int),
1516 .proc_handler = &ipv4_doint_and_flush,
1517 .strategy = &ipv4_doint_and_flush_strategy,
1520 .ctl_name = NET_IPV4_CONF_NOPOLICY,
1521 .procname = "disable_policy",
1522 .data = &ipv4_devconf.no_policy,
1523 .maxlen = sizeof(int),
1525 .proc_handler = &ipv4_doint_and_flush,
1526 .strategy = &ipv4_doint_and_flush_strategy,
1529 .ctl_name = NET_IPV4_CONF_FORCE_IGMP_VERSION,
1530 .procname = "force_igmp_version",
1531 .data = &ipv4_devconf.force_igmp_version,
1532 .maxlen = sizeof(int),
1534 .proc_handler = &ipv4_doint_and_flush,
1535 .strategy = &ipv4_doint_and_flush_strategy,
1538 .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1539 .procname = "promote_secondaries",
1540 .data = &ipv4_devconf.promote_secondaries,
1541 .maxlen = sizeof(int),
1543 .proc_handler = &ipv4_doint_and_flush,
1544 .strategy = &ipv4_doint_and_flush_strategy,
1549 .ctl_name = NET_PROTO_CONF_ALL,
1552 .child = devinet_sysctl.devinet_vars,
1555 .devinet_conf_dir = {
1557 .ctl_name = NET_IPV4_CONF,
1560 .child = devinet_sysctl.devinet_dev,
1563 .devinet_proto_dir = {
1565 .ctl_name = NET_IPV4,
1568 .child = devinet_sysctl.devinet_conf_dir,
1571 .devinet_root_dir = {
1573 .ctl_name = CTL_NET,
1576 .child = devinet_sysctl.devinet_proto_dir,
1581 static void devinet_sysctl_register(struct in_device *in_dev,
1582 struct ipv4_devconf *p)
1585 struct net_device *dev = in_dev ? in_dev->dev : NULL;
1586 struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1588 char *dev_name = NULL;
1592 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1593 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1594 t->devinet_vars[i].de = NULL;
1598 dev_name = dev->name;
1599 t->devinet_dev[0].ctl_name = dev->ifindex;
1601 dev_name = "default";
1602 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1606 * Make a copy of dev_name, because '.procname' is regarded as const
1607 * by sysctl and we wouldn't want anyone to change it under our feet
1608 * (see SIOCSIFNAME).
1610 dev_name = kstrdup(dev_name, GFP_KERNEL);
1614 t->devinet_dev[0].procname = dev_name;
1615 t->devinet_dev[0].child = t->devinet_vars;
1616 t->devinet_dev[0].de = NULL;
1617 t->devinet_conf_dir[0].child = t->devinet_dev;
1618 t->devinet_conf_dir[0].de = NULL;
1619 t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1620 t->devinet_proto_dir[0].de = NULL;
1621 t->devinet_root_dir[0].child = t->devinet_proto_dir;
1622 t->devinet_root_dir[0].de = NULL;
1624 t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
1625 if (!t->sysctl_header)
1639 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1642 struct devinet_sysctl_table *t = p->sysctl;
1644 unregister_sysctl_table(t->sysctl_header);
1645 kfree(t->devinet_dev[0].procname);
1651 void __init devinet_init(void)
1653 register_gifconf(PF_INET, inet_gifconf);
1654 register_netdevice_notifier(&ip_netdev_notifier);
1655 rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1656 #ifdef CONFIG_SYSCTL
1657 devinet_sysctl.sysctl_header =
1658 register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
1659 devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1663 EXPORT_SYMBOL(in_dev_finish_destroy);
1664 EXPORT_SYMBOL(inet_select_addr);
1665 EXPORT_SYMBOL(inetdev_by_index);
1666 EXPORT_SYMBOL(register_inetaddr_notifier);
1667 EXPORT_SYMBOL(unregister_inetaddr_notifier);