X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv6%2Faddrconf.c;h=5c1b33f40a5298f2a5252913804a0bc49cf6a79a;hb=16c70f8c1b54b61c3b951b6fb220df250fe09b32;hp=226aa2bb40b54a95362344b5aa96b77e90b3635e;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 226aa2bb4..5c1b33f40 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -35,9 +35,11 @@ * YOSHIFUJI Hideaki @USAGI : ARCnet support * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to * seq_file. + * YOSHIFUJI Hideaki @USAGI : improved source address + * selection; consider scope, + * status etc. */ -#include #include #include #include @@ -48,14 +50,17 @@ #include #include #include +#include #include #include #include #ifdef CONFIG_SYSCTL #include #endif +#include #include #include +#include #include #include @@ -72,8 +77,6 @@ #ifdef CONFIG_IPV6_PRIVACY #include -#include -#include #endif #include @@ -98,17 +101,12 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf static void addrconf_sysctl_unregister(struct ipv6_devconf *p); #endif -int inet6_dev_count; -int inet6_ifa_count; - #ifdef CONFIG_IPV6_PRIVACY static int __ipv6_regen_rndid(struct inet6_dev *idev); static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); static int desync_factor = MAX_DESYNC_FACTOR * HZ; -static struct crypto_tfm *md5_tfm; -static spinlock_t md5_tfm_lock = SPIN_LOCK_UNLOCKED; #endif static int ipv6_count_addresses(struct inet6_dev *idev); @@ -117,30 +115,34 @@ static int ipv6_count_addresses(struct inet6_dev *idev); * Configured unicast address hash table */ static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; -static rwlock_t addrconf_hash_lock = RW_LOCK_UNLOCKED; +static DEFINE_RWLOCK(addrconf_hash_lock); /* Protects inet6 devices */ -rwlock_t addrconf_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(addrconf_lock); static void addrconf_verify(unsigned long); -static struct timer_list addr_chk_timer = - TIMER_INITIALIZER(addrconf_verify, 0, 0); -static spinlock_t addrconf_verify_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); +static DEFINE_SPINLOCK(addrconf_verify_lock); + +static void addrconf_join_anycast(struct inet6_ifaddr *ifp); +static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); static int addrconf_ifdown(struct net_device *dev, int how); -static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags); +static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); static void addrconf_dad_timer(unsigned long data); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); +static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); +static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo); static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); -static struct notifier_block *inet6addr_chain; +static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); struct ipv6_devconf ipv6_devconf = { .forwarding = 0, @@ -162,6 +164,15 @@ struct ipv6_devconf ipv6_devconf = { .max_desync_factor = MAX_DESYNC_FACTOR, #endif .max_addresses = IPV6_MAX_ADDRESSES, + .accept_ra_defrtr = 1, + .accept_ra_pinfo = 1, +#ifdef CONFIG_IPV6_ROUTER_PREF + .accept_ra_rtr_pref = 1, + .rtr_probe_interval = 60 * HZ, +#ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_max_plen = 0, +#endif +#endif }; static struct ipv6_devconf ipv6_devconf_dflt = { @@ -183,52 +194,68 @@ static struct ipv6_devconf ipv6_devconf_dflt = { .max_desync_factor = MAX_DESYNC_FACTOR, #endif .max_addresses = IPV6_MAX_ADDRESSES, + .accept_ra_defrtr = 1, + .accept_ra_pinfo = 1, +#ifdef CONFIG_IPV6_ROUTER_PREF + .accept_ra_rtr_pref = 1, + .rtr_probe_interval = 60 * HZ, +#ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_max_plen = 0, +#endif +#endif }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ +#if 0 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +#endif const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; -int ipv6_addr_type(const struct in6_addr *addr) +#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) + +static inline unsigned ipv6_addr_scope2type(unsigned scope) +{ + switch(scope) { + case IPV6_ADDR_SCOPE_NODELOCAL: + return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) | + IPV6_ADDR_LOOPBACK); + case IPV6_ADDR_SCOPE_LINKLOCAL: + return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) | + IPV6_ADDR_LINKLOCAL); + case IPV6_ADDR_SCOPE_SITELOCAL: + return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) | + IPV6_ADDR_SITELOCAL); + } + return IPV6_ADDR_SCOPE_TYPE(scope); +} + +int __ipv6_addr_type(const struct in6_addr *addr) { - int type; u32 st; st = addr->s6_addr32[0]; - if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { - type = IPV6_ADDR_MULTICAST; - - switch((st & htonl(0x00FF0000))) { - case __constant_htonl(0x00010000): - type |= IPV6_ADDR_LOOPBACK; - break; - - case __constant_htonl(0x00020000): - type |= IPV6_ADDR_LINKLOCAL; - break; - - case __constant_htonl(0x00050000): - type |= IPV6_ADDR_SITELOCAL; - break; - }; - return type; - } - - type = IPV6_ADDR_UNICAST; - /* Consider all addresses with the first three bits different of - 000 and 111 as finished. + 000 and 111 as unicasts. */ if ((st & htonl(0xE0000000)) != htonl(0x00000000) && (st & htonl(0xE0000000)) != htonl(0xE0000000)) - return type; - - if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) - return (IPV6_ADDR_LINKLOCAL | type); + return (IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); + if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { + /* multicast */ + /* addr-select 3.1 */ + return (IPV6_ADDR_MULTICAST | + ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr))); + } + + if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) + return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.1 */ if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) - return (IPV6_ADDR_SITELOCAL | type); + return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL)); /* addr-select 3.1 */ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { if (addr->s6_addr32[2] == 0) { @@ -236,24 +263,20 @@ int ipv6_addr_type(const struct in6_addr *addr) return IPV6_ADDR_ANY; if (addr->s6_addr32[3] == htonl(0x00000001)) - return (IPV6_ADDR_LOOPBACK | type); + return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.4 */ - return (IPV6_ADDR_COMPATv4 | type); + return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ } if (addr->s6_addr32[2] == htonl(0x0000ffff)) - return IPV6_ADDR_MAPPED; + return (IPV6_ADDR_MAPPED | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ } - st &= htonl(0xFF000000); - if (st == 0) - return IPV6_ADDR_RESERVED; - st &= htonl(0xFE000000); - if (st == htonl(0x02000000)) - return IPV6_ADDR_RESERVED; /* for NSAP */ - if (st == htonl(0x04000000)) - return IPV6_ADDR_RESERVED; /* for IPX */ - return type; + return (IPV6_ADDR_RESERVED | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.4 */ } static void addrconf_del_timer(struct inet6_ifaddr *ifp) @@ -304,8 +327,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) printk("Freeing alive inet6 device %p\n", idev); return; } - snmp6_unregister_dev(idev); - inet6_dev_count--; + snmp6_free_dev(idev); kfree(idev); } @@ -318,71 +340,83 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (dev->mtu < IPV6_MIN_MTU) return NULL; - ndev = kmalloc(sizeof(struct inet6_dev), GFP_KERNEL); + ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); - if (ndev) { - memset(ndev, 0, sizeof(struct inet6_dev)); + if (ndev == NULL) + return NULL; - ndev->lock = RW_LOCK_UNLOCKED; - ndev->dev = dev; - memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); - ndev->cnf.mtu6 = dev->mtu; - ndev->cnf.sysctl = NULL; - ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); - if (ndev->nd_parms == NULL) { - kfree(ndev); - return NULL; - } - inet6_dev_count++; - /* We refer to the device */ - dev_hold(dev); - - if (snmp6_register_dev(ndev) < 0) { - ADBG((KERN_WARNING - "%s(): cannot create /proc/net/dev_snmp6/%s\n", - __FUNCTION__, dev->name)); - neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); - return NULL; - } + rwlock_init(&ndev->lock); + ndev->dev = dev; + memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); + ndev->cnf.mtu6 = dev->mtu; + ndev->cnf.sysctl = NULL; + ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); + if (ndev->nd_parms == NULL) { + kfree(ndev); + return NULL; + } + /* We refer to the device */ + dev_hold(dev); + + if (snmp6_alloc_dev(ndev) < 0) { + ADBG((KERN_WARNING + "%s(): cannot allocate memory for statistics; dev=%s.\n", + __FUNCTION__, dev->name)); + neigh_parms_release(&nd_tbl, ndev->nd_parms); + ndev->dead = 1; + in6_dev_finish_destroy(ndev); + return NULL; + } - /* One reference from device. We must do this before - * we invoke __ipv6_regen_rndid(). - */ - in6_dev_hold(ndev); + if (snmp6_register_dev(ndev) < 0) { + ADBG((KERN_WARNING + "%s(): cannot create /proc/net/dev_snmp6/%s\n", + __FUNCTION__, dev->name)); + neigh_parms_release(&nd_tbl, ndev->nd_parms); + ndev->dead = 1; + in6_dev_finish_destroy(ndev); + return NULL; + } + + /* One reference from device. We must do this before + * we invoke __ipv6_regen_rndid(). + */ + in6_dev_hold(ndev); #ifdef CONFIG_IPV6_PRIVACY - get_random_bytes(ndev->rndid, sizeof(ndev->rndid)); - get_random_bytes(ndev->entropy, sizeof(ndev->entropy)); - init_timer(&ndev->regen_timer); - ndev->regen_timer.function = ipv6_regen_rndid; - ndev->regen_timer.data = (unsigned long) ndev; - if ((dev->flags&IFF_LOOPBACK) || - dev->type == ARPHRD_TUNNEL || - dev->type == ARPHRD_SIT) { - printk(KERN_INFO - "Disabled Privacy Extensions on device %p(%s)\n", - dev, dev->name); - ndev->cnf.use_tempaddr = -1; - } else { - in6_dev_hold(ndev); - ipv6_regen_rndid((unsigned long) ndev); - } + init_timer(&ndev->regen_timer); + ndev->regen_timer.function = ipv6_regen_rndid; + ndev->regen_timer.data = (unsigned long) ndev; + if ((dev->flags&IFF_LOOPBACK) || + dev->type == ARPHRD_TUNNEL || + dev->type == ARPHRD_NONE || + dev->type == ARPHRD_SIT) { + printk(KERN_INFO + "%s: Disabled Privacy Extensions\n", + dev->name); + ndev->cnf.use_tempaddr = -1; + } else { + in6_dev_hold(ndev); + ipv6_regen_rndid((unsigned long) ndev); + } #endif - write_lock_bh(&addrconf_lock); - dev->ip6_ptr = ndev; - write_unlock_bh(&addrconf_lock); + if (netif_carrier_ok(dev)) + ndev->if_flags |= IF_READY; - ipv6_mc_init_dev(ndev); - ndev->tstamp = jiffies; + write_lock_bh(&addrconf_lock); + dev->ip6_ptr = ndev; + write_unlock_bh(&addrconf_lock); + + ipv6_mc_init_dev(ndev); + ndev->tstamp = jiffies; #ifdef CONFIG_SYSCTL - neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, - NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change); - addrconf_sysctl_register(ndev, &ndev->cnf); + neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, + NET_IPV6_NEIGH, "ipv6", + &ndisc_ifinfo_sysctl_change, + NULL); + addrconf_sysctl_register(ndev, &ndev->cnf); #endif - } return ndev; } @@ -396,11 +430,13 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev) if ((idev = ipv6_add_dev(dev)) == NULL) return NULL; } + if (dev->flags&IFF_UP) ipv6_mc_up(idev); return idev; } +#ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { struct net_device *dev; @@ -419,39 +455,34 @@ static void dev_forward_change(struct inet6_dev *idev) ipv6_dev_mc_dec(dev, &addr); } for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { - ipv6_addr_prefix(&addr, &ifa->addr, ifa->prefix_len); - if (ipv6_addr_any(&addr)) - continue; if (idev->cnf.forwarding) - ipv6_dev_ac_inc(idev->dev, &addr); + addrconf_join_anycast(ifa); else - ipv6_dev_ac_dec(idev->dev, &addr); + addrconf_leave_anycast(ifa); } } -static void addrconf_forward_change(struct inet6_dev *idev) +static void addrconf_forward_change(void) { struct net_device *dev; - - if (idev) { - dev_forward_change(idev); - return; - } + struct inet6_dev *idev; read_lock(&dev_base_lock); for (dev=dev_base; dev; dev=dev->next) { read_lock(&addrconf_lock); idev = __in6_dev_get(dev); if (idev) { + int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); idev->cnf.forwarding = ipv6_devconf.forwarding; - dev_forward_change(idev); + if (changed) + dev_forward_change(idev); } read_unlock(&addrconf_lock); } read_unlock(&dev_base_lock); } - +#endif /* Nobody refers to this ifaddr, destroy it */ @@ -472,38 +503,71 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) printk("Freeing alive inet6 address %p\n", ifp); return; } - inet6_ifa_count--; + dst_release(&ifp->rt->u.dst); + kfree(ifp); } +static void +ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *ifa, **ifap; + int ifp_scope = ipv6_addr_src_scope(&ifp->addr); + + /* + * Each device address list is sorted in order of scope - + * global before linklocal. + */ + for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; + ifap = &ifa->if_next) { + if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) + break; + } + + ifp->if_next = *ifap; + *ifap = ifp; +} + /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, - int scope, unsigned flags) + int scope, u32 flags) { - struct inet6_ifaddr *ifa; + struct inet6_ifaddr *ifa = NULL; + struct rt6_info *rt; int hash; - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + int err = 0; + + read_lock_bh(&addrconf_lock); + if (idev->dead) { + err = -ENODEV; /*XXX*/ + goto out2; + } - spin_lock_bh(&lock); + write_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(addr, idev->dev)) { - spin_unlock_bh(&lock); ADBG(("ipv6_add_addr: already assigned\n")); - return ERR_PTR(-EEXIST); + err = -EEXIST; + goto out; } - ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); + ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); if (ifa == NULL) { - spin_unlock_bh(&lock); ADBG(("ipv6_add_addr: malloc failed\n")); - return ERR_PTR(-ENOBUFS); + err = -ENOBUFS; + goto out; + } + + rt = addrconf_dst_alloc(idev, addr, 0); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto out; } - memset(ifa, 0, sizeof(struct inet6_ifaddr)); ipv6_addr_copy(&ifa->addr, addr); spin_lock_init(&ifa->lock); @@ -514,15 +578,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, ifa->flags = flags | IFA_F_TENTATIVE; ifa->cstamp = ifa->tstamp = jiffies; - read_lock(&addrconf_lock); - if (idev->dead) { - read_unlock(&addrconf_lock); - spin_unlock_bh(&lock); - kfree(ifa); - return ERR_PTR(-ENODEV); /*XXX*/ - } + ifa->rt = rt; - inet6_ifa_count++; ifa->idev = idev; in6_dev_hold(idev); /* For caller */ @@ -531,36 +588,39 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, /* Add to big hash table */ hash = ipv6_addr_hash(addr); - write_lock_bh(&addrconf_hash_lock); ifa->lst_next = inet6_addr_lst[hash]; inet6_addr_lst[hash] = ifa; in6_ifa_hold(ifa); - write_unlock_bh(&addrconf_hash_lock); + write_unlock(&addrconf_hash_lock); - write_lock_bh(&idev->lock); + write_lock(&idev->lock); /* Add to inet6_dev unicast addr list. */ - ifa->if_next = idev->addr_list; - idev->addr_list = ifa; + ipv6_link_dev_addr(idev, ifa); #ifdef CONFIG_IPV6_PRIVACY - ifa->regen_count = 0; if (ifa->flags&IFA_F_TEMPORARY) { ifa->tmp_next = idev->tempaddr_list; idev->tempaddr_list = ifa; in6_ifa_hold(ifa); - } else { - ifa->tmp_next = NULL; } #endif in6_ifa_hold(ifa); - write_unlock_bh(&idev->lock); - read_unlock(&addrconf_lock); - spin_unlock_bh(&lock); + write_unlock(&idev->lock); +out2: + read_unlock_bh(&addrconf_lock); - notifier_call_chain(&inet6addr_chain,NETDEV_UP,ifa); + if (likely(err == 0)) + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); + else { + kfree(ifa); + ifa = ERR_PTR(err); + } return ifa; +out: + write_unlock(&addrconf_hash_lock); + goto out2; } /* This function wants to get referenced ifp and releases it before return */ @@ -570,6 +630,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) struct inet6_ifaddr *ifa, **ifap; struct inet6_dev *idev = ifp->idev; int hash; + int deleted = 0, onlink = 0; + unsigned long expires = jiffies; hash = ipv6_addr_hash(&ifp->addr); @@ -606,36 +668,96 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } #endif - for (ifap = &idev->addr_list; (ifa=*ifap) != NULL; - ifap = &ifa->if_next) { + for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { if (ifa == ifp) { *ifap = ifa->if_next; __in6_ifa_put(ifp); ifa->if_next = NULL; - break; + if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) + break; + deleted = 1; + continue; + } else if (ifp->flags & IFA_F_PERMANENT) { + if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, + ifp->prefix_len)) { + if (ifa->flags & IFA_F_PERMANENT) { + onlink = 1; + if (deleted) + break; + } else { + unsigned long lifetime; + + if (!onlink) + onlink = -1; + + spin_lock(&ifa->lock); + lifetime = min_t(unsigned long, + ifa->valid_lft, 0x7fffffffUL/HZ); + if (time_before(expires, + ifa->tstamp + lifetime * HZ)) + expires = ifa->tstamp + lifetime * HZ; + spin_unlock(&ifa->lock); + } + } } + ifap = &ifa->if_next; } write_unlock_bh(&idev->lock); ipv6_ifa_notify(RTM_DELADDR, ifp); - notifier_call_chain(&inet6addr_chain,NETDEV_DOWN,ifp); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); addrconf_del_timer(ifp); + /* + * Purge or update corresponding prefix + * + * 1) we don't purge prefix here if address was not permanent. + * prefix is managed by its own lifetime. + * 2) if there're no addresses, delete prefix. + * 3) if there're still other permanent address(es), + * corresponding prefix is still permanent. + * 4) otherwise, update prefix lifetime to the + * longest valid lifetime among the corresponding + * addresses on the device. + * Note: subsequent RA will update lifetime. + * + * --yoshfuji + */ + if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { + struct in6_addr prefix; + struct rt6_info *rt; + + ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); + rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1); + + if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { + if (onlink == 0) { + ip6_del_rt(rt, NULL, NULL, NULL); + rt = NULL; + } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { + rt->rt6i_expires = expires; + rt->rt6i_flags |= RTF_EXPIRES; + } + } + dst_release(&rt->u.dst); + } + in6_ifa_put(ifp); } #ifdef CONFIG_IPV6_PRIVACY static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) { - struct inet6_dev *idev; + struct inet6_dev *idev = ifp->idev; struct in6_addr addr, *tmpaddr; - unsigned long tmp_prefered_lft, tmp_valid_lft; + unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp; int tmp_plen; int ret = 0; int max_addresses; + write_lock(&idev->lock); if (ift) { spin_lock_bh(&ift->lock); memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); @@ -645,40 +767,35 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i tmpaddr = NULL; } retry: - spin_lock_bh(&ifp->lock); - in6_ifa_hold(ifp); - idev = ifp->idev; in6_dev_hold(idev); - memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - write_lock(&idev->lock); if (idev->cnf.use_tempaddr <= 0) { write_unlock(&idev->lock); - spin_unlock_bh(&ifp->lock); printk(KERN_INFO "ipv6_create_tempaddr(): use_tempaddr is disabled.\n"); in6_dev_put(idev); - in6_ifa_put(ifp); ret = -1; goto out; } + spin_lock_bh(&ifp->lock); if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { idev->cnf.use_tempaddr = -1; /*XXX*/ - write_unlock(&idev->lock); spin_unlock_bh(&ifp->lock); + write_unlock(&idev->lock); printk(KERN_WARNING "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n"); in6_dev_put(idev); - in6_ifa_put(ifp); ret = -1; goto out; } + in6_ifa_hold(ifp); + memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { - write_unlock(&idev->lock); spin_unlock_bh(&ifp->lock); + write_unlock(&idev->lock); printk(KERN_WARNING "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n"); - in6_dev_put(idev); in6_ifa_put(ifp); + in6_dev_put(idev); ret = -1; goto out; } @@ -691,27 +808,33 @@ retry: idev->cnf.temp_prefered_lft - desync_factor / HZ); tmp_plen = ifp->prefix_len; max_addresses = idev->cnf.max_addresses; - write_unlock(&idev->lock); + tmp_cstamp = ifp->cstamp; + tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); + + write_unlock(&idev->lock); ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : 0; + ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL; if (!ift || IS_ERR(ift)) { - in6_dev_put(idev); in6_ifa_put(ifp); + in6_dev_put(idev); printk(KERN_INFO "ipv6_create_tempaddr(): retry temporary address regeneration.\n"); tmpaddr = &addr; + write_lock(&idev->lock); goto retry; } + spin_lock_bh(&ift->lock); ift->ifpub = ifp; ift->valid_lft = tmp_valid_lft; ift->prefered_lft = tmp_prefered_lft; - ift->cstamp = ifp->cstamp; - ift->tstamp = ifp->tstamp; + ift->cstamp = tmp_cstamp; + ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); + addrconf_dad_start(ift, 0); in6_ifa_put(ift); in6_dev_put(idev); @@ -721,158 +844,301 @@ out: #endif /* - * Choose an appropriate source address - * should do: - * i) get an address with an appropriate scope - * ii) see if there is a specific route for the destination and use - * an address of the attached interface - * iii) don't use deprecated addresses + * Choose an appropriate source address (RFC3484) */ -static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref) +struct ipv6_saddr_score { + int addr_type; + unsigned int attrs; + int matchlen; + int scope; + unsigned int rule; +}; + +#define IPV6_SADDR_SCORE_LOCAL 0x0001 +#define IPV6_SADDR_SCORE_PREFERRED 0x0004 +#define IPV6_SADDR_SCORE_HOA 0x0008 +#define IPV6_SADDR_SCORE_OIF 0x0010 +#define IPV6_SADDR_SCORE_LABEL 0x0020 +#define IPV6_SADDR_SCORE_PRIVACY 0x0040 + +static int inline ipv6_saddr_preferred(int type) { - int pref; - pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2; -#ifdef CONFIG_IPV6_PRIVACY - pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1; -#endif - return pref; + if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| + IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) + return 1; + return 0; } -#ifdef CONFIG_IPV6_PRIVACY -#define IPV6_GET_SADDR_MAXSCORE(score) ((score) == 3) -#else -#define IPV6_GET_SADDR_MAXSCORE(score) (score) -#endif +/* static matching label */ +static int inline ipv6_saddr_label(const struct in6_addr *addr, int type) +{ + /* + * prefix (longest match) label + * ----------------------------- + * ::1/128 0 + * ::/0 1 + * 2002::/16 2 + * ::/96 3 + * ::ffff:0:0/96 4 + * fc00::/7 5 + * 2001::/32 6 + */ + if (type & IPV6_ADDR_LOOPBACK) + return 0; + else if (type & IPV6_ADDR_COMPATv4) + return 3; + else if (type & IPV6_ADDR_MAPPED) + return 4; + else if (addr->s6_addr32[0] == htonl(0x20010000)) + return 6; + else if (addr->s6_addr16[0] == htons(0x2002)) + return 2; + else if ((addr->s6_addr[0] & 0xfe) == 0xfc) + return 5; + return 1; +} -int ipv6_dev_get_saddr(struct net_device *dev, - struct in6_addr *daddr, struct in6_addr *saddr, int onlink) +int ipv6_dev_get_saddr(struct net_device *daddr_dev, + struct in6_addr *daddr, struct in6_addr *saddr) { - struct inet6_ifaddr *ifp = NULL; - struct inet6_ifaddr *match = NULL; - struct inet6_dev *idev; - int scope; - int err; - int hiscore = -1, score; + struct ipv6_saddr_score hiscore; + struct inet6_ifaddr *ifa_result = NULL; + int daddr_type = __ipv6_addr_type(daddr); + int daddr_scope = __ipv6_addr_src_scope(daddr_type); + u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); + struct net_device *dev; - if (!onlink) - scope = ipv6_addr_scope(daddr); - else - scope = IFA_LINK; + memset(&hiscore, 0, sizeof(hiscore)); - /* - * known dev - * search dev and walk through dev addresses - */ + read_lock(&dev_base_lock); + read_lock(&addrconf_lock); - if (dev) { - if (dev->flags & IFF_LOOPBACK) - scope = IFA_HOST; + for (dev = dev_base; dev; dev=dev->next) { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + + /* Rule 0: Candidate Source Address (section 4) + * - multicast and link-local destination address, + * the set of candidate source address MUST only + * include addresses assigned to interfaces + * belonging to the same link as the outgoing + * interface. + * (- For site-local destination addresses, the + * set of candidate source addresses MUST only + * include addresses assigned to interfaces + * belonging to the same site as the outgoing + * interface.) + */ + if ((daddr_type & IPV6_ADDR_MULTICAST || + daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && + daddr_dev && dev != daddr_dev) + continue; - read_lock(&addrconf_lock); idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (ifp->flags&IFA_F_TENTATIVE) - continue; -#ifdef CONFIG_IPV6_PRIVACY - score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); -#else - score = ipv6_saddr_pref(ifp, 0); -#endif - if (score <= hiscore) - continue; + if (!idev) + continue; - if (match) - in6_ifa_put(match); - match = ifp; - hiscore = score; - in6_ifa_hold(ifp); + read_lock_bh(&idev->lock); + for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { + struct ipv6_saddr_score score; + + score.addr_type = __ipv6_addr_type(&ifa->addr); + + /* Rule 0: + * - Tentative Address (RFC2462 section 5.4) + * - A tentative address is not considered + * "assigned to an interface" in the traditional + * sense. + * - Candidate Source Address (section 4) + * - In any case, anycast addresses, multicast + * addresses, and the unspecified address MUST + * NOT be included in a candidate set. + */ + if (ifa->flags & IFA_F_TENTATIVE) + continue; + if (unlikely(score.addr_type == IPV6_ADDR_ANY || + score.addr_type & IPV6_ADDR_MULTICAST)) { + LIMIT_NETDEBUG(KERN_DEBUG + "ADDRCONF: unspecified / multicast address" + "assigned as unicast address on %s", + dev->name); + continue; + } - if (IPV6_GET_SADDR_MAXSCORE(score)) { - read_unlock_bh(&idev->lock); - read_unlock(&addrconf_lock); - goto out; - } + score.attrs = 0; + score.matchlen = 0; + score.scope = 0; + score.rule = 0; + + if (ifa_result == NULL) { + /* record it if the first available entry */ + goto record_it; + } + + /* Rule 1: Prefer same address */ + if (hiscore.rule < 1) { + if (ipv6_addr_equal(&ifa_result->addr, daddr)) + hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; + hiscore.rule++; + } + if (ipv6_addr_equal(&ifa->addr, daddr)) { + score.attrs |= IPV6_SADDR_SCORE_LOCAL; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { + score.rule = 1; + goto record_it; } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) + continue; } - read_unlock_bh(&idev->lock); - } - read_unlock(&addrconf_lock); - } - if (scope == IFA_LINK) - goto out; + /* Rule 2: Prefer appropriate scope */ + if (hiscore.rule < 2) { + hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); + hiscore.rule++; + } + score.scope = __ipv6_addr_src_scope(score.addr_type); + if (hiscore.scope < score.scope) { + if (hiscore.scope < daddr_scope) { + score.rule = 2; + goto record_it; + } else + continue; + } else if (score.scope < hiscore.scope) { + if (score.scope < daddr_scope) + break; /* addresses sorted by scope */ + else { + score.rule = 2; + goto record_it; + } + } - /* - * dev == NULL or search failed for specified dev - */ + /* Rule 3: Avoid deprecated address */ + if (hiscore.rule < 3) { + if (ipv6_saddr_preferred(hiscore.addr_type) || + !(ifa_result->flags & IFA_F_DEPRECATED)) + hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; + hiscore.rule++; + } + if (ipv6_saddr_preferred(score.addr_type) || + !(ifa->flags & IFA_F_DEPRECATED)) { + score.attrs |= IPV6_SADDR_SCORE_PREFERRED; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { + score.rule = 3; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) + continue; + } - read_lock(&dev_base_lock); - read_lock(&addrconf_lock); - for (dev = dev_base; dev; dev=dev->next) { - idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (ifp->flags&IFA_F_TENTATIVE) - continue; -#ifdef CONFIG_IPV6_PRIVACY - score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); -#else - score = ipv6_saddr_pref(ifp, 0); -#endif - if (score <= hiscore) - continue; + /* Rule 4: Prefer home address -- not implemented yet */ + if (hiscore.rule < 4) + hiscore.rule++; - if (match) - in6_ifa_put(match); - match = ifp; - hiscore = score; - in6_ifa_hold(ifp); + /* Rule 5: Prefer outgoing interface */ + if (hiscore.rule < 5) { + if (daddr_dev == NULL || + daddr_dev == ifa_result->idev->dev) + hiscore.attrs |= IPV6_SADDR_SCORE_OIF; + hiscore.rule++; + } + if (daddr_dev == NULL || + daddr_dev == ifa->idev->dev) { + score.attrs |= IPV6_SADDR_SCORE_OIF; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { + score.rule = 5; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) + continue; + } - if (IPV6_GET_SADDR_MAXSCORE(score)) { - read_unlock_bh(&idev->lock); - goto out_unlock_base; - } + /* Rule 6: Prefer matching label */ + if (hiscore.rule < 6) { + if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) + hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; + hiscore.rule++; + } + if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { + score.attrs |= IPV6_SADDR_SCORE_LABEL; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { + score.rule = 6; + goto record_it; } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) + continue; } - read_unlock_bh(&idev->lock); + +#ifdef CONFIG_IPV6_PRIVACY + /* Rule 7: Prefer public address + * Note: prefer temprary address if use_tempaddr >= 2 + */ + if (hiscore.rule < 7) { + if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ + (ifa_result->idev->cnf.use_tempaddr >= 2)) + hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; + hiscore.rule++; + } + if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ + (ifa->idev->cnf.use_tempaddr >= 2)) { + score.attrs |= IPV6_SADDR_SCORE_PRIVACY; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { + score.rule = 7; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) + continue; + } +#else + if (hiscore.rule < 7) + hiscore.rule++; +#endif + /* Rule 8: Use longest matching prefix */ + if (hiscore.rule < 8) { + hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); + hiscore.rule++; + } + score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); + if (score.matchlen > hiscore.matchlen) { + score.rule = 8; + goto record_it; + } +#if 0 + else if (score.matchlen < hiscore.matchlen) + continue; +#endif + + /* Final Rule: choose first available one */ + continue; +record_it: + if (ifa_result) + in6_ifa_put(ifa_result); + in6_ifa_hold(ifa); + ifa_result = ifa; + hiscore = score; } + read_unlock_bh(&idev->lock); } - -out_unlock_base: read_unlock(&addrconf_lock); read_unlock(&dev_base_lock); -out: - err = -EADDRNOTAVAIL; - if (match) { - ipv6_addr_copy(saddr, &match->addr); - err = 0; - in6_ifa_put(match); - } - - return err; + if (!ifa_result) + return -EADDRNOTAVAIL; + + ipv6_addr_copy(saddr, &ifa_result->addr); + in6_ifa_put(ifa_result); + return 0; } int ipv6_get_saddr(struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr) { - struct rt6_info *rt; - struct net_device *dev = NULL; - int onlink; - - rt = (struct rt6_info *) dst; - if (rt) - dev = rt->rt6i_dev; - - onlink = (rt && (rt->rt6i_flags & RTF_ALLONLINK)); - - return ipv6_dev_get_saddr(dev, daddr, saddr, onlink); + return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr); } @@ -918,7 +1184,7 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && + if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp->flags&IFA_F_TENTATIVE)) { if (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) @@ -935,14 +1201,12 @@ int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ipv6_addr_cmp(&ifp->addr, addr) == 0) { + if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev) break; } } - read_unlock_bh(&addrconf_hash_lock); return ifp != NULL; } @@ -953,7 +1217,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device * read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ipv6_addr_cmp(&ifp->addr, addr) == 0) { + if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { in6_ifa_hold(ifp); @@ -969,11 +1233,11 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device * int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; - const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); - int sk2_ipv6only = tcp_v6_ipv6only(sk2); + int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -989,7 +1253,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (sk2_rcv_saddr6 && - !ipv6_addr_cmp(sk_rcv_saddr6, sk2_rcv_saddr6)) + ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) return 1; if (addr_type == IPV6_ADDR_MAPPED && @@ -1002,10 +1266,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* Gets referenced address, destroys ifaddr */ -void addrconf_dad_failure(struct inet6_ifaddr *ifp) +static void addrconf_dad_stop(struct inet6_ifaddr *ifp) { - if (net_ratelimit()) - printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); addrconf_del_timer(ifp); @@ -1031,6 +1293,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) ipv6_del_addr(ifp); } +void addrconf_dad_failure(struct inet6_ifaddr *ifp) +{ + if (net_ratelimit()) + printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); + addrconf_dad_stop(ifp); +} /* Join to solicited addr multicast group. */ @@ -1045,17 +1313,84 @@ void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) ipv6_dev_mc_inc(dev, &maddr); } -void addrconf_leave_solict(struct net_device *dev, struct in6_addr *addr) +void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) { struct in6_addr maddr; - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); - ipv6_dev_mc_dec(dev, &maddr); + __ipv6_dev_mc_dec(idev, &maddr); +} + +static void addrconf_join_anycast(struct inet6_ifaddr *ifp) +{ + struct in6_addr addr; + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); + if (ipv6_addr_any(&addr)) + return; + ipv6_dev_ac_inc(ifp->idev->dev, &addr); +} + +static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) +{ + struct in6_addr addr; + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); + if (ipv6_addr_any(&addr)) + return; + __ipv6_dev_ac_dec(ifp->idev, &addr); +} + +static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +{ + if (dev->addr_len != ETH_ALEN) + return -1; + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + + /* + * The zSeries OSA network cards can be shared among various + * OS instances, but the OSA cards have only one MAC address. + * This leads to duplicate address conflicts in conjunction + * with IPv6 if more than one instance uses the same card. + * + * The driver for these cards can deliver a unique 16-bit + * identifier for each instance sharing the same card. It is + * placed instead of 0xFFFE in the interface identifier. The + * "u" bit of the interface identifier is not inverted in this + * case. Hence the resulting interface identifier has local + * scope according to RFC2373. + */ + if (dev->dev_id) { + eui[3] = (dev->dev_id >> 8) & 0xFF; + eui[4] = dev->dev_id & 0xFF; + } else { + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + } + return 0; +} + +static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) +{ + /* XXX: inherit EUI-64 from other interface -- yoshfuji */ + if (dev->addr_len != ARCNET_ALEN) + return -1; + memset(eui, 0, 7); + eui[7] = *(u8*)dev->dev_addr; + return 0; } +static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) +{ + if (dev->addr_len != INFINIBAND_ALEN) + return -1; + memcpy(eui, dev->dev_addr + 12, 8); + eui[0] |= 2; + return 0; +} static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) { @@ -1063,21 +1398,11 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) case ARPHRD_ETHER: case ARPHRD_FDDI: case ARPHRD_IEEE802_TR: - if (dev->addr_len != ETH_ALEN) - return -1; - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr+3, 3); - eui[3] = 0xFF; - eui[4] = 0xFE; - eui[0] ^= 2; - return 0; + return addrconf_ifid_eui48(eui, dev); case ARPHRD_ARCNET: - /* XXX: inherit EUI-64 from other interface -- yoshfuji */ - if (dev->addr_len != ARCNET_ALEN) - return -1; - memset(eui, 0, 7); - eui[7] = *(u8*)dev->dev_addr; - return 0; + return addrconf_ifid_arcnet(eui, dev); + case ARPHRD_INFINIBAND: + return addrconf_ifid_infiniband(eui, dev); } return -1; } @@ -1103,38 +1428,9 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ static int __ipv6_regen_rndid(struct inet6_dev *idev) { - struct net_device *dev; - struct scatterlist sg[2]; - - sg[0].page = virt_to_page(idev->entropy); - sg[0].offset = offset_in_page(idev->entropy); - sg[0].length = 8; - sg[1].page = virt_to_page(idev->work_eui64); - sg[1].offset = offset_in_page(idev->work_eui64); - sg[1].length = 8; - - dev = idev->dev; - - if (ipv6_generate_eui64(idev->work_eui64, dev)) { - printk(KERN_INFO - "__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n", - idev); - get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64)); - } regen: - spin_lock(&md5_tfm_lock); - if (unlikely(md5_tfm == NULL)) { - spin_unlock(&md5_tfm_lock); - return -1; - } - crypto_digest_init(md5_tfm); - crypto_digest_update(md5_tfm, sg, 2); - crypto_digest_final(md5_tfm, idev->work_digest); - spin_unlock(&md5_tfm_lock); - - memcpy(idev->rndid, &idev->work_digest[0], 8); + get_random_bytes(idev->rndid, sizeof(idev->rndid)); idev->rndid[0] &= ~0x02; - memcpy(idev->entropy, &idev->work_digest[8], 8); /* * : @@ -1209,7 +1505,7 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, - unsigned long expires, unsigned flags) + unsigned long expires, u32 flags) { struct in6_rtmsg rtmsg; @@ -1229,7 +1525,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) rtmsg.rtmsg_flags |= RTF_NONEXTHOP; - ip6_route_add(&rtmsg, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL); } /* Create "default" multicast route to the interface */ @@ -1246,7 +1542,7 @@ static void addrconf_add_mroute(struct net_device *dev) rtmsg.rtmsg_ifindex = dev->ifindex; rtmsg.rtmsg_flags = RTF_UP; rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ip6_route_add(&rtmsg, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL); } static void sit_route_add(struct net_device *dev) @@ -1263,7 +1559,7 @@ static void sit_route_add(struct net_device *dev) rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; rtmsg.rtmsg_ifindex = dev->ifindex; - ip6_route_add(&rtmsg, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL); } static void addrconf_add_lroute(struct net_device *dev) @@ -1345,9 +1641,17 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) not good. */ if (valid_lft >= 0x7FFFFFFF/HZ) - rt_expires = 0; + rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); else - rt_expires = jiffies + valid_lft * HZ; + rt_expires = valid_lft * HZ; + + /* + * We convert this (in jiffies) to clock_t later. + * Avoid arithmetic overflow there as well. + * Overflow can happen only if HZ < USER_HZ. + */ + if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) + rt_expires = 0x7FFFFFFF / USER_HZ; if (pinfo->onlink) { struct rt6_info *rt; @@ -1356,15 +1660,15 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { if (valid_lft == 0) { - ip6_del_rt(rt, NULL, NULL); + ip6_del_rt(rt, NULL, NULL, NULL); rt = NULL; } else { - rt->rt6i_expires = rt_expires; + rt->rt6i_expires = jiffies + rt_expires; } } } else if (valid_lft) { addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); + dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); } if (rt) dst_release(&rt->u.dst); @@ -1457,8 +1761,7 @@ ok: spin_unlock(&ifp->lock); if (!(flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ? - 0 : RTM_NEWADDR, ifp); + ipv6_ifa_notify(0, ifp); } else spin_unlock(&ifp->lock); @@ -1510,7 +1813,7 @@ ok: * Special case for SIT interfaces where we create a new "virtual" * device. */ -int addrconf_set_dstaddr(void *arg) +int addrconf_set_dstaddr(void __user *arg) { struct in6_ifreq ireq; struct net_device *dev; @@ -1544,7 +1847,7 @@ int addrconf_set_dstaddr(void *arg) p.iph.ihl = 5; p.iph.protocol = IPPROTO_IPV6; p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (void*)&p; + ifr.ifr_ifru.ifru_data = (void __user *)&p; oldfs = get_fs(); set_fs(KERNEL_DS); err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); @@ -1566,15 +1869,21 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); + /* check the lifetime */ + if (!valid_lft || prefered_lft > valid_lft) + return -EINVAL; + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; @@ -1586,10 +1895,29 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) scope = ipv6_addr_scope(pfx); - ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT); + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags |= IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags |= IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + if (!IS_ERR(ifp)) { + spin_lock_bh(&ifp->lock); + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + spin_unlock_bh(&ifp->lock); + addrconf_dad_start(ifp, 0); in6_ifa_put(ifp); + addrconf_verify(0); return 0; } @@ -1611,7 +1939,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) read_lock_bh(&idev->lock); for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { if (ifp->prefix_len == plen && - (!memcmp(pfx, &ifp->addr, sizeof(struct in6_addr)))) { + ipv6_addr_equal(pfx, &ifp->addr)) { in6_ifa_hold(ifp); read_unlock_bh(&idev->lock); @@ -1630,7 +1958,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) } -int addrconf_add_ifaddr(void *arg) +int addrconf_add_ifaddr(void __user *arg) { struct in6_ifreq ireq; int err; @@ -1642,12 +1970,13 @@ int addrconf_add_ifaddr(void *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } -int addrconf_del_ifaddr(void *arg) +int addrconf_del_ifaddr(void __user *arg) { struct in6_ifreq ireq; int err; @@ -1696,7 +2025,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } for (dev = dev_base; dev != NULL; dev = dev->next) { - struct in_device * in_dev = __in_dev_get(dev); + struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -1778,7 +2107,8 @@ static void addrconf_dev_config(struct net_device *dev) if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_IEEE802_TR) && - (dev->type != ARPHRD_ARCNET)) { + (dev->type != ARPHRD_ARCNET) && + (dev->type != ARPHRD_INFINIBAND)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -1866,7 +2196,6 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) return; } ip6_tnl_add_linklocal(idev); - addrconf_add_mroute(dev); } static int addrconf_notify(struct notifier_block *this, unsigned long event, @@ -1874,9 +2203,45 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *) data; struct inet6_dev *idev = __in6_dev_get(dev); + int run_pending = 0; switch(event) { case NETDEV_UP: + case NETDEV_CHANGE: + if (event == NETDEV_UP) { + if (!netif_carrier_ok(dev)) { + /* device is not ready yet. */ + printk(KERN_INFO + "ADDRCONF(NETDEV_UP): %s: " + "link is not ready\n", + dev->name); + break; + } + + if (idev) + idev->if_flags |= IF_READY; + } else { + if (!netif_carrier_ok(dev)) { + /* device is still not ready. */ + break; + } + + if (idev) { + if (idev->if_flags & IF_READY) { + /* device is already configured. */ + break; + } + idev->if_flags |= IF_READY; + } + + printk(KERN_INFO + "ADDRCONF(NETDEV_CHANGE): %s: " + "link becomes ready\n", + dev->name); + + run_pending = 1; + } + switch(dev->type) { case ARPHRD_SIT: addrconf_sit_config(dev); @@ -1893,6 +2258,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; }; if (idev) { + if (run_pending) + addrconf_dad_run(idev); + /* If the MTU changed during the interface down, when the interface up, the changed MTU must be reflected in the idev as well as routers. @@ -1927,14 +2295,16 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, */ addrconf_ifdown(dev, event != NETDEV_DOWN); break; - case NETDEV_CHANGE: - break; + case NETDEV_CHANGENAME: #ifdef CONFIG_SYSCTL if (idev) { addrconf_sysctl_unregister(&idev->cnf); neigh_sysctl_unregister(idev->nd_parms); - neigh_sysctl_register(dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change); + neigh_sysctl_register(dev, idev->nd_parms, + NET_IPV6, NET_IPV6_NEIGH, "ipv6", + &ndisc_ifinfo_sysctl_change, + NULL); addrconf_sysctl_register(idev, &idev->cnf); } #endif @@ -1960,6 +2330,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); + if (dev == &loopback_dev && how == 1) + how = 0; + rt6_ifdown(dev); neigh_ifdown(&nd_tbl, dev); @@ -1975,6 +2348,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) dev->ip6_ptr = NULL; idev->dead = 1; write_unlock_bh(&addrconf_lock); + + /* Step 1.5: remove snmp6 entry */ + snmp6_unregister_dev(idev); + } /* Step 2: clear hash table */ @@ -1999,7 +2376,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 3: clear flags for stateless addrconf */ if (how != 1) - idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD); + idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); /* Step 4: clear address list */ #ifdef CONFIG_IPV6_PRIVACY @@ -2030,7 +2407,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) addrconf_del_timer(ifa); write_unlock_bh(&idev->lock); - ipv6_ifa_notify(RTM_DELADDR, ifa); + __ipv6_ifa_notify(RTM_DELADDR, ifa); in6_ifa_put(ifa); write_lock_bh(&idev->lock); @@ -2046,7 +2423,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 5: netlink notification of this interface */ idev->tstamp = jiffies; - inet6_ifinfo_notify(RTM_NEWLINK, idev); + inet6_ifinfo_notify(RTM_DELLINK, idev); /* Shot the device (if unregistered) */ @@ -2056,6 +2433,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) neigh_sysctl_unregister(idev->nd_parms); #endif neigh_parms_release(&nd_tbl, idev->nd_parms); + neigh_ifdown(&nd_tbl, dev); in6_dev_put(idev); } return 0; @@ -2091,21 +2469,13 @@ static void addrconf_rs_timer(unsigned long data) ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); } else { - struct in6_rtmsg rtmsg; - spin_unlock(&ifp->lock); - + /* + * Note: we do not support deprecated "all on-link" + * assumption any longer. + */ printk(KERN_DEBUG "%s: no IPv6 routers present\n", ifp->idev->dev->name); - - memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_flags = (RTF_ALLONLINK | RTF_DEFAULT | RTF_UP); - - rtmsg.rtmsg_ifindex = ifp->idev->dev->ifindex; - - ip6_route_add(&rtmsg, NULL, NULL); } out: @@ -2115,12 +2485,20 @@ out: /* * Duplicate Address Detection */ -static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags) +static void addrconf_dad_kick(struct inet6_ifaddr *ifp) { - struct net_device *dev; unsigned long rand_num; + struct inet6_dev *idev = ifp->idev; + + rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); + ifp->probes = idev->cnf.dad_transmits; + addrconf_mod_timer(ifp, AC_DAD, rand_num); +} - dev = ifp->idev->dev; +static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) +{ + struct inet6_dev *idev = ifp->idev; + struct net_device *dev = idev->dev; addrconf_join_solict(dev, &ifp->addr); @@ -2129,31 +2507,53 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags) flags); net_srandom(ifp->addr.s6_addr32[3]); - rand_num = net_random() % (ifp->idev->cnf.rtr_solicit_delay ? : 1); + read_lock_bh(&idev->lock); + if (ifp->dead) + goto out; spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || + !(dev->flags&IFF_MULTICAST) || !(ifp->flags&IFA_F_TENTATIVE)) { ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); + read_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); return; } - ifp->probes = ifp->idev->cnf.dad_transmits; - addrconf_mod_timer(ifp, AC_DAD, rand_num); - + if (!(idev->if_flags & IF_READY)) { + spin_unlock_bh(&ifp->lock); + read_unlock_bh(&idev->lock); + /* + * If the defice is not ready: + * - keep it tentative if it is a permanent address. + * - otherwise, kill it. + */ + in6_ifa_hold(ifp); + addrconf_dad_stop(ifp); + return; + } + addrconf_dad_kick(ifp); spin_unlock_bh(&ifp->lock); +out: + read_unlock_bh(&idev->lock); } static void addrconf_dad_timer(unsigned long data) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + struct inet6_dev *idev = ifp->idev; struct in6_addr unspec; struct in6_addr mcaddr; + read_lock_bh(&idev->lock); + if (idev->dead) { + read_unlock_bh(&idev->lock); + goto out; + } spin_lock_bh(&ifp->lock); if (ifp->probes == 0) { /* @@ -2162,22 +2562,23 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); + read_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); - in6_ifa_put(ifp); - return; + goto out; } ifp->probes--; addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); spin_unlock_bh(&ifp->lock); + read_unlock_bh(&idev->lock); /* send a neighbour solicitation for our addr */ memset(&unspec, 0, sizeof(unspec)); addrconf_addr_solict_mult(&ifp->addr, &mcaddr); ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); - +out: in6_ifa_put(ifp); } @@ -2198,6 +2599,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding == 0 && ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0 && + (dev->flags & IFF_MULTICAST) && (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { struct in6_addr all_routers; @@ -2216,14 +2618,22 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); spin_unlock_bh(&ifp->lock); } +} - if (ifp->idev->cnf.forwarding) { - struct in6_addr addr; +static void addrconf_dad_run(struct inet6_dev *idev) { + struct inet6_ifaddr *ifp; - ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); - if (!ipv6_addr_any(&addr)) - ipv6_dev_ac_inc(ifp->idev->dev, &addr); + read_lock_bh(&idev->lock); + for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { + spin_lock_bh(&ifp->lock); + if (!(ifp->flags & IFA_F_TENTATIVE)) { + spin_unlock_bh(&ifp->lock); + continue; + } + spin_unlock_bh(&ifp->lock); + addrconf_dad_kick(ifp); } + read_unlock_bh(&idev->lock); } #ifdef CONFIG_PROC_FS @@ -2290,8 +2700,11 @@ static void if6_seq_stop(struct seq_file *seq, void *v) static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, - "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n", + + /* no ipv6 inside a vserver for now */ + if (vx_check(0, VX_ADMIN|VX_WATCH)) + seq_printf(seq, + NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", NIP6(ifp->addr), ifp->idev->dev->ifindex, ifp->prefix_len, @@ -2312,11 +2725,10 @@ static int if6_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct if6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; - memset(s, 0, sizeof(*s)); rc = seq_open(file, &if6_seq_ops); if (rc) @@ -2371,7 +2783,7 @@ static void addrconf_verify(unsigned long foo) for (i=0; i < IN6_ADDR_HSIZE; i++) { restart: - write_lock(&addrconf_hash_lock); + read_lock(&addrconf_hash_lock); for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { unsigned long age; #ifdef CONFIG_IPV6_PRIVACY @@ -2390,12 +2802,16 @@ restart: ifp->idev->nd_parms->retrans_time / HZ; #endif - if (age >= ifp->valid_lft) { + if (ifp->valid_lft != INFINITY_LIFE_TIME && + age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); - write_unlock(&addrconf_hash_lock); + read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; + } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { + spin_unlock(&ifp->lock); + continue; } else if (age >= ifp->prefered_lft) { /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ int deprecate = 0; @@ -2412,7 +2828,7 @@ restart: if (deprecate) { in6_ifa_hold(ifp); - write_unlock(&addrconf_hash_lock); + read_unlock(&addrconf_hash_lock); ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); @@ -2430,7 +2846,10 @@ restart: in6_ifa_hold(ifp); in6_ifa_hold(ifpub); spin_unlock(&ifp->lock); - write_unlock(&addrconf_hash_lock); + read_unlock(&addrconf_hash_lock); + spin_lock(&ifpub->lock); + ifpub->regen_count = 0; + spin_unlock(&ifpub->lock); ipv6_create_tempaddr(ifpub, ifp); in6_ifa_put(ifpub); in6_ifa_put(ifp); @@ -2447,7 +2866,7 @@ restart: spin_unlock(&ifp->lock); } } - write_unlock(&addrconf_hash_lock); + read_unlock(&addrconf_hash_lock); } addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; @@ -2469,7 +2888,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } @@ -2479,12 +2899,62 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } +static int +inet6_addr_modify(int ifindex, struct in6_addr *pfx, + __u32 prefered_lft, __u32 valid_lft) +{ + struct inet6_ifaddr *ifp = NULL; + struct net_device *dev; + int ifa_flags = 0; + + if ((dev = __dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + + if (!valid_lft || (prefered_lft > valid_lft)) + return -EINVAL; + + ifp = ipv6_get_ifaddr(pfx, dev, 1); + if (ifp == NULL) + return -ENOENT; + + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags = IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags = IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + spin_lock_bh(&ifp->lock); + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; + + ifp->tstamp = jiffies; + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + + spin_unlock_bh(&ifp->lock); + if (!(ifp->flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); + + addrconf_verify(0); + + return 0; +} + static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct rtattr **rta = arg; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; + __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; pfx = NULL; if (rta[IFA_ADDRESS-1]) { @@ -2493,26 +2963,50 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } if (pfx == NULL) return -EINVAL; - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + if (rta[IFA_CACHEINFO-1]) { + struct ifa_cacheinfo *ci; + if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) + return -EINVAL; + ci = RTA_DATA(rta[IFA_CACHEINFO-1]); + valid_lft = ci->ifa_valid; + prefered_lft = ci->ifa_prefered; + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + int ret; + ret = inet6_addr_modify(ifm->ifa_index, pfx, + prefered_lft, valid_lft); + if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) + return ret; + } + + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + prefered_lft, valid_lft); + } +/* Maximum length of ifa_cacheinfo attributes */ +#define INET6_IFADDR_RTA_SPACE \ + RTA_SPACE(16) /* IFA_ADDRESS */ + \ + RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */ + static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 pid, u32 seq, int event) + u32 pid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; struct ifa_cacheinfo ci; unsigned char *b = skb->tail; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); - if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); ifm = NLMSG_DATA(nlh); ifm->ifa_family = AF_INET6; ifm->ifa_prefixlen = ifa->prefix_len; @@ -2554,15 +3048,14 @@ rtattr_failure: } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 pid, u32 seq, int event) + u32 pid, u32 seq, int event, u16 flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; struct ifa_cacheinfo ci; unsigned char *b = skb->tail; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); - if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); ifm = NLMSG_DATA(nlh); ifm->ifa_family = AF_INET6; ifm->ifa_prefixlen = 128; @@ -2591,15 +3084,14 @@ rtattr_failure: } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 pid, u32 seq, int event) + u32 pid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; struct ifa_cacheinfo ci; unsigned char *b = skb->tail; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); - if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); ifm = NLMSG_DATA(nlh); ifm->ifa_family = AF_INET6; ifm->ifa_prefixlen = 128; @@ -2646,6 +3138,10 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); @@ -2661,28 +3157,17 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, read_lock_bh(&idev->lock); switch (type) { case UNICAST_ADDR: - /* unicast address */ + /* unicast address incl. temp addr */ for (ifa = idev->addr_list; ifa; ifa = ifa->if_next, ip_idx++) { if (ip_idx < s_ip_idx) continue; if ((err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) - goto done; - } - /* temp addr */ -#ifdef CONFIG_IPV6_PRIVACY - for (ifa = idev->tempaddr_list; ifa; - ifa = ifa->tmp_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - if ((err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) + cb->nlh->nlmsg_seq, RTM_NEWADDR, + NLM_F_MULTI)) <= 0) goto done; } -#endif break; case MULTICAST_ADDR: /* multicast address */ @@ -2692,7 +3177,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, continue; if ((err = inet6_fill_ifmcaddr(skb, ifmca, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0) + cb->nlh->nlmsg_seq, RTM_GETMULTICAST, + NLM_F_MULTI)) <= 0) goto done; } break; @@ -2704,7 +3190,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, continue; if ((err = inet6_fill_ifacaddr(skb, ifaca, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) + cb->nlh->nlmsg_seq, RTM_GETANYCAST, + NLM_F_MULTI)) <= 0) goto done; } break; @@ -2744,23 +3231,79 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_getaddr(struct sk_buff *in_skb, + struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in6_addr *addr = NULL; + struct net_device *dev = NULL; + struct inet6_ifaddr *ifa; + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int err; + + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) + return -EINVAL; + addr = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || + (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) + return -EINVAL; + addr = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (addr == NULL) + return -EINVAL; + + if (ifm->ifa_index) + dev = __dev_get_by_index(ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) + return -EADDRNOTAVAIL; + + if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { + err = -ENOBUFS; + goto out; + } + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, RTM_NEWADDR, 0); + if (err < 0) { + err = -EMSGSIZE; + goto out_free; + } + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + in6_ifa_put(ifa); + return err; +out_free: + kfree_skb(skb); + goto out; +} + static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); return; } - if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { + if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -2786,10 +3329,30 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; #endif array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; + array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; + array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; +#ifdef CONFIG_IPV6_ROUTER_PREF + array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; + array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; +#ifdef CONFIV_IPV6_ROUTE_INFO + array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; +#endif +#endif } +/* Maximum length of ifinfomsg attributes */ +#define INET6_IFINFO_RTA_SPACE \ + RTA_SPACE(IFNAMSIZ) /* IFNAME */ + \ + RTA_SPACE(MAX_ADDR_LEN) /* ADDRESS */ + \ + RTA_SPACE(sizeof(u32)) /* MTU */ + \ + RTA_SPACE(sizeof(int)) /* LINK */ + \ + RTA_SPACE(0) /* PROTINFO */ + \ + RTA_SPACE(sizeof(u32)) /* FLAGS */ + \ + RTA_SPACE(sizeof(struct ifla_cacheinfo)) /* CACHEINFO */ + \ + RTA_SPACE(sizeof(__s32[DEVCONF_MAX])) /* CONF */ + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, - u32 pid, u32 seq, int event) + u32 pid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; __s32 *array = NULL; @@ -2800,18 +3363,14 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, __u32 mtu = dev->mtu; struct ifla_cacheinfo ci; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); - if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); r = NLMSG_DATA(nlh); r->ifi_family = AF_INET6; + r->__ifi_pad = 0; r->ifi_type = dev->type; r->ifi_index = dev->ifindex; - r->ifi_flags = dev->flags; + r->ifi_flags = dev_get_flags(dev); r->ifi_change = 0; - if (!netif_running(dev) || !netif_carrier_ok(dev)) - r->ifi_flags &= ~IFF_RUNNING; - else - r->ifi_flags |= IFF_RUNNING; RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); @@ -2852,8 +3411,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, nlmsg_failure: rtattr_failure: - if (array) - kfree(array); + kfree(array); skb_trim(skb, b - skb->data); return -1; } @@ -2865,6 +3423,10 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct inet6_dev *idev; + /* no ipv6 inside a vserver for now */ + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; + read_lock(&dev_base_lock); for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) @@ -2872,7 +3434,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if ((idev = in6_dev_get(dev)) == NULL) continue; err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWLINK); + cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); in6_dev_put(idev); if (err <= 0) break; @@ -2886,41 +3448,45 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; - /* 128 bytes ?? */ - int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+128); + int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); return; } - if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) { + if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); } +/* Maximum length of prefix_cacheinfo attributes */ +#define INET6_PREFIX_RTA_SPACE \ + RTA_SPACE(sizeof(((struct prefix_info *)NULL)->prefix)) /* ADDRESS */ + \ + RTA_SPACE(sizeof(struct prefix_cacheinfo)) /* CACHEINFO */ + static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, int event) + struct prefix_info *pinfo, u32 pid, u32 seq, + int event, unsigned int flags) { struct prefixmsg *pmsg; struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct prefix_cacheinfo ci; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg)); - - if (pid) - nlh->nlmsg_flags |= NLM_F_MULTI; - + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags); pmsg = NLMSG_DATA(nlh); pmsg->prefix_family = AF_INET6; + pmsg->prefix_pad1 = 0; + pmsg->prefix_pad2 = 0; pmsg->prefix_ifindex = idev->dev->ifindex; pmsg->prefix_len = pinfo->prefix_len; pmsg->prefix_type = pinfo->type; + pmsg->prefix_pad3 = 0; pmsg->prefix_flags = 0; if (pinfo->onlink) @@ -2947,27 +3513,28 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct prefixmsg)+128); + int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); return; } - if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) { + if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); } -static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = { +static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, }, + [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr, + .dumpit = inet6_dump_ifaddr, }, [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, }, [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, }, [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, }, @@ -2976,64 +3543,71 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = { .dumpit = inet6_dump_fib, }, }; -static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { case RTM_NEWADDR: - ip6_rt_addr_add(&ifp->addr, ifp->idev->dev, 0); + ip6_ins_rt(ifp->rt, NULL, NULL, NULL); + if (ifp->idev->cnf.forwarding) + addrconf_join_anycast(ifp); break; case RTM_DELADDR: - addrconf_leave_solict(ifp->idev->dev, &ifp->addr); - if (ifp->idev->cnf.forwarding) { - struct in6_addr addr; - - ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); - if (!ipv6_addr_any(&addr)) - ipv6_dev_ac_dec(ifp->idev->dev, &addr); - } - if (!ipv6_chk_addr(&ifp->addr, ifp->idev->dev, 1)) - ip6_rt_addr_del(&ifp->addr, ifp->idev->dev); + if (ifp->idev->cnf.forwarding) + addrconf_leave_anycast(ifp); + addrconf_leave_solict(ifp->idev, &ifp->addr); + dst_hold(&ifp->rt->u.dst); + if (ip6_del_rt(ifp->rt, NULL, NULL, NULL)) + dst_free(&ifp->rt->u.dst); break; } } +static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +{ + read_lock_bh(&addrconf_lock); + if (likely(ifp->idev->dead == 0)) + __ipv6_ifa_notify(event, ifp); + read_unlock_bh(&addrconf_lock); +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, - void *buffer, size_t *lenp) + void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp); - - if (write && *valp != val && valp != &ipv6_devconf_dflt.forwarding) { - struct inet6_dev *idev = NULL; + ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + if (write && valp != &ipv6_devconf_dflt.forwarding) { if (valp != &ipv6_devconf.forwarding) { - idev = (struct inet6_dev *)ctl->extra1; - if (idev == NULL) - return ret; - } else + if ((!*valp) ^ (!val)) { + struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; + if (idev == NULL) + return ret; + dev_forward_change(idev); + } + } else { ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; - - addrconf_forward_change(idev); - + addrconf_forward_change(); + } if (*valp) - rt6_purge_dflt_routers(0); + rt6_purge_dflt_routers(); } return ret; } static int addrconf_sysctl_forward_strategy(ctl_table *table, - int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, + int __user *name, int nlen, + void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen, void **context) { int *valp = table->data; @@ -3043,7 +3617,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, return 0; if (newlen != sizeof(int)) return -EINVAL; - if (get_user(new, (int *)newval)) + if (get_user(new, (int __user *)newval)) return -EFAULT; if (new == *valp) return 0; @@ -3062,18 +3636,22 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, } if (valp != &ipv6_devconf_dflt.forwarding) { - struct inet6_dev *idev; if (valp != &ipv6_devconf.forwarding) { - idev = (struct inet6_dev *)table->extra1; + struct inet6_dev *idev = (struct inet6_dev *)table->extra1; + int changed; if (unlikely(idev == NULL)) return -ENODEV; - } else - idev = NULL; - *valp = new; - addrconf_forward_change(idev); + changed = (!*valp) ^ (!new); + *valp = new; + if (changed) + dev_forward_change(idev); + } else { + *valp = new; + addrconf_forward_change(); + } if (*valp) - rt6_purge_dflt_routers(0); + rt6_purge_dflt_routers(); } else *valp = new; @@ -3083,7 +3661,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[18]; + ctl_table addrconf_vars[__NET_IPV6_MAX]; ctl_table addrconf_dev[2]; ctl_table addrconf_conf_dir[2]; ctl_table addrconf_proto_dir[2]; @@ -3232,6 +3810,51 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, + .procname = "accept_ra_defrtr", + .data = &ipv6_devconf.accept_ra_defrtr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, + .procname = "accept_ra_pinfo", + .data = &ipv6_devconf.accept_ra_pinfo, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#ifdef CONFIG_IPV6_ROUTER_PREF + { + .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF, + .procname = "accept_ra_rtr_pref", + .data = &ipv6_devconf.accept_ra_rtr_pref, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, + .procname = "router_probe_interval", + .data = &ipv6_devconf.rtr_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, +#ifdef CONFIV_IPV6_ROUTE_INFO + { + .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, + .procname = "accept_ra_rt_info_max_plen", + .data = &ipv6_devconf.accept_ra_rt_info_max_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#endif { .ctl_name = 0, /* sentinel */ } @@ -3311,7 +3934,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf * by sysctl and we wouldn't want anyone to change it under our feet * (see SIOCSIFNAME). */ - dev_name = net_sysctl_strdup(dev_name); + dev_name = kstrdup(dev_name, GFP_KERNEL); if (!dev_name) goto free; @@ -3362,28 +3985,50 @@ static void addrconf_sysctl_unregister(struct ipv6_devconf *p) int register_inet6addr_notifier(struct notifier_block *nb) { - return notifier_chain_register(&inet6addr_chain, nb); + return atomic_notifier_chain_register(&inet6addr_chain, nb); } int unregister_inet6addr_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&inet6addr_chain,nb); + return atomic_notifier_chain_unregister(&inet6addr_chain,nb); } /* * Init / cleanup code */ -void __init addrconf_init(void) +int __init addrconf_init(void) { - register_netdevice_notifier(&ipv6_dev_notf); + int err = 0; -#ifdef CONFIG_IPV6_PRIVACY - md5_tfm = crypto_alloc_tfm("md5", 0); - if (unlikely(md5_tfm == NULL)) - printk(KERN_WARNING - "failed to load transform for md5\n"); -#endif + /* The addrconf netdev notifier requires that loopback_dev + * has it's ipv6 private information allocated and setup + * before it can bring up and give link-local addresses + * to other devices which are up. + * + * Unfortunately, loopback_dev is not necessarily the first + * entry in the global dev_base list of net devices. In fact, + * it is likely to be the very last entry on that list. + * So this causes the notifier registry below to try and + * give link-local addresses to all devices besides loopback_dev + * first, then loopback_dev, which cases all the non-loopback_dev + * devices to fail to get a link-local address. + * + * So, as a temporary fix, allocate the ipv6 structure for + * loopback_dev first by hand. + * Longer term, all of the dependencies ipv6 has upon the loopback + * device and it being up should be removed. + */ + rtnl_lock(); + if (!ipv6_add_dev(&loopback_dev)) + err = -ENOMEM; + rtnl_unlock(); + if (err) + return err; + + ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + + register_netdevice_notifier(&ipv6_dev_notf); addrconf_verify(0); rtnetlink_links[PF_INET6] = inet6_rtnetlink_table; @@ -3392,6 +4037,8 @@ void __init addrconf_init(void) register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0); addrconf_sysctl_register(NULL, &ipv6_devconf_dflt); #endif + + return 0; } void __exit addrconf_cleanup(void) @@ -3420,6 +4067,7 @@ void __exit addrconf_cleanup(void) continue; addrconf_ifdown(dev, 1); } + addrconf_ifdown(&loopback_dev, 2); /* * Check hash table. @@ -3444,13 +4092,6 @@ void __exit addrconf_cleanup(void) rtnl_unlock(); -#ifdef CONFIG_IPV6_PRIVACY - if (likely(md5_tfm != NULL)) { - crypto_free_tfm(md5_tfm); - md5_tfm = NULL; - } -#endif - #ifdef CONFIG_PROC_FS proc_net_remove("if_inet6"); #endif