vserver 1.9.3
[linux-2.6.git] / net / ipv4 / devinet.c
index c9cd619..cc5dd56 100644 (file)
@@ -88,31 +88,31 @@ static void devinet_sysctl_register(struct in_device *in_dev,
 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
 #endif
 
-int inet_ifa_count;
-int inet_dev_count;
-
 /* Locks all the inet devices. */
 
-rwlock_t inetdev_lock = RW_LOCK_UNLOCKED;
-
 static struct in_ifaddr *inet_alloc_ifa(void)
 {
        struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
 
        if (ifa) {
                memset(ifa, 0, sizeof(*ifa));
-               inet_ifa_count++;
+               INIT_RCU_HEAD(&ifa->rcu_head);
        }
 
        return ifa;
 }
 
-static __inline__ void inet_free_ifa(struct in_ifaddr *ifa)
+static void inet_rcu_free_ifa(struct rcu_head *head)
 {
+       struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
        if (ifa->ifa_dev)
-               __in_dev_put(ifa->ifa_dev);
+               in_dev_put(ifa->ifa_dev);
        kfree(ifa);
-       inet_ifa_count--;
+}
+
+static inline void inet_free_ifa(struct in_ifaddr *ifa)
+{
+       call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 }
 
 void in_dev_finish_destroy(struct in_device *idev)
@@ -129,7 +129,6 @@ void in_dev_finish_destroy(struct in_device *idev)
        if (!idev->dead)
                printk("Freeing alive in_device %p\n", idev);
        else {
-               inet_dev_count--;
                kfree(idev);
        }
 }
@@ -144,24 +143,24 @@ struct in_device *inetdev_init(struct net_device *dev)
        if (!in_dev)
                goto out;
        memset(in_dev, 0, sizeof(*in_dev));
-       in_dev->lock = RW_LOCK_UNLOCKED;
+       INIT_RCU_HEAD(&in_dev->rcu_head);
        memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
        in_dev->cnf.sysctl = NULL;
        in_dev->dev = dev;
        if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
                goto out_kfree;
-       inet_dev_count++;
        /* Reference in_dev->dev */
        dev_hold(dev);
 #ifdef CONFIG_SYSCTL
        neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
                              NET_IPV4_NEIGH, "ipv4", NULL);
 #endif
-       write_lock_bh(&inetdev_lock);
-       dev->ip_ptr = in_dev;
+
        /* Account for reference dev->ip_ptr */
        in_dev_hold(in_dev);
-       write_unlock_bh(&inetdev_lock);
+       smp_wmb();
+       dev->ip_ptr = in_dev;
+
 #ifdef CONFIG_SYSCTL
        devinet_sysctl_register(in_dev, &in_dev->cnf);
 #endif
@@ -176,9 +175,16 @@ out_kfree:
        goto out;
 }
 
+static void in_dev_rcu_put(struct rcu_head *head)
+{
+       struct in_device *idev = container_of(head, struct in_device, rcu_head);
+       in_dev_put(idev);
+}
+
 static void inetdev_destroy(struct in_device *in_dev)
 {
        struct in_ifaddr *ifa;
+       struct net_device *dev;
 
        ASSERT_RTNL();
 
@@ -194,30 +200,31 @@ static void inetdev_destroy(struct in_device *in_dev)
 #ifdef CONFIG_SYSCTL
        devinet_sysctl_unregister(&in_dev->cnf);
 #endif
-       write_lock_bh(&inetdev_lock);
-       in_dev->dev->ip_ptr = NULL;
-       /* in_dev_put following below will kill the in_device */
-       write_unlock_bh(&inetdev_lock);
+
+       dev = in_dev->dev;
+       dev->ip_ptr = NULL;
 
 #ifdef CONFIG_SYSCTL
        neigh_sysctl_unregister(in_dev->arp_parms);
 #endif
        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
-       in_dev_put(in_dev);
+       arp_ifdown(dev);
+
+       call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 }
 
 int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
 {
-       read_lock(&in_dev->lock);
+       rcu_read_lock();
        for_primary_ifa(in_dev) {
                if (inet_ifa_match(a, ifa)) {
                        if (!b || inet_ifa_match(b, ifa)) {
-                               read_unlock(&in_dev->lock);
+                               rcu_read_unlock();
                                return 1;
                        }
                }
        } endfor_ifa(in_dev);
-       read_unlock(&in_dev->lock);
+       rcu_read_unlock();
        return 0;
 }
 
@@ -241,9 +248,8 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
                                ifap1 = &ifa->ifa_next;
                                continue;
                        }
-                       write_lock_bh(&in_dev->lock);
+
                        *ifap1 = ifa->ifa_next;
-                       write_unlock_bh(&in_dev->lock);
 
                        rtmsg_ifa(RTM_DELADDR, ifa);
                        notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
@@ -253,9 +259,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 
        /* 2. Unlink it */
 
-       write_lock_bh(&in_dev->lock);
        *ifap = ifa1->ifa_next;
-       write_unlock_bh(&in_dev->lock);
 
        /* 3. Announce address deletion */
 
@@ -317,9 +321,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
        }
 
        ifa->ifa_next = *ifap;
-       write_lock_bh(&in_dev->lock);
        *ifap = ifa;
-       write_unlock_bh(&in_dev->lock);
 
        /* Send message first, then call notifier.
           Notifier will trigger FIB update, so that
@@ -488,8 +490,35 @@ static __inline__ int inet_abc_len(u32 addr)
        return rc;
 }
 
+/*
+       Check that a device is not member of the ipv4root assigned to the process
+       Return true if this is the case
 
-int devinet_ioctl(unsigned int cmd, void *arg)
+       If the process is not bound to specific IP, then it returns 0 (all
+       interface are fine).
+*/
+static inline int devinet_notiproot (struct in_ifaddr *ifa)
+{
+       int ret = 0;
+       struct nx_info *nxi;
+
+       if ((nxi = current->nx_info)) {
+               int i;
+               int nbip = nxi->nbipv4;
+               __u32 addr = ifa->ifa_local;
+               ret = 1;
+               for (i=0; i<nbip; i++) {
+                       if(nxi->ipv4[i] == addr) {
+                               ret = 0;
+                               break;
+                       }
+               }
+       }
+       return ret;
+}
+
+
+int devinet_ioctl(unsigned int cmd, void __user *arg)
 {
        struct ifreq ifr;
        struct sockaddr_in sin_orig;
@@ -595,6 +624,9 @@ int devinet_ioctl(unsigned int cmd, void *arg)
        ret = -EADDRNOTAVAIL;
        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
                goto done;
+       if (vx_flags(VXF_HIDE_NETIF, 0) &&
+               !ifa_in_nx_info(ifa, current->nx_info))
+               goto done;
 
        switch(cmd) {
        case SIOCGIFADDR:       /* Get interface address */
@@ -699,6 +731,20 @@ int devinet_ioctl(unsigned int cmd, void *arg)
                        inet_del_ifa(in_dev, ifap, 0);
                        ifa->ifa_mask = sin->sin_addr.s_addr;
                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
+
+                       /* See if current broadcast address matches
+                        * with current netmask, then recalculate
+                        * the broadcast address. Otherwise it's a
+                        * funny address, so don't touch it since
+                        * the user seems to know what (s)he's doing...
+                        */
+                       if ((dev->flags & IFF_BROADCAST) &&
+                           (ifa->ifa_prefixlen < 31) &&
+                           (ifa->ifa_broadcast ==
+                            (ifa->ifa_local|~ifa->ifa_mask))) {
+                               ifa->ifa_broadcast = (ifa->ifa_local |
+                                                     ~sin->sin_addr.s_addr);
+                       }
                        inet_insert_ifa(ifa);
                }
                break;
@@ -713,7 +759,7 @@ rarok:
        goto out;
 }
 
-static int inet_gifconf(struct net_device *dev, char *buf, int len)
+static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
 {
        struct in_device *in_dev = __in_dev_get(dev);
        struct in_ifaddr *ifa;
@@ -724,6 +770,9 @@ static int inet_gifconf(struct net_device *dev, char *buf, int len)
                goto out;
 
        for (; ifa; ifa = ifa->ifa_next) {
+               if (vx_flags(VXF_HIDE_NETIF, 0) &&
+                       !ifa_in_nx_info(ifa, current->nx_info))
+                       continue;
                if (!buf) {
                        done += sizeof(ifr);
                        continue;
@@ -757,12 +806,11 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
        u32 addr = 0;
        struct in_device *in_dev;
 
-       read_lock(&inetdev_lock);
+       rcu_read_lock();
        in_dev = __in_dev_get(dev);
        if (!in_dev)
-               goto out_unlock_inetdev;
+               goto no_in_dev;
 
-       read_lock(&in_dev->lock);
        for_primary_ifa(in_dev) {
                if (ifa->ifa_scope > scope)
                        continue;
@@ -773,8 +821,8 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
                if (!addr)
                        addr = ifa->ifa_local;
        } endfor_ifa(in_dev);
-       read_unlock(&in_dev->lock);
-       read_unlock(&inetdev_lock);
+no_in_dev:
+       rcu_read_unlock();
 
        if (addr)
                goto out;
@@ -784,30 +832,24 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
           in dev_base list.
         */
        read_lock(&dev_base_lock);
-       read_lock(&inetdev_lock);
+       rcu_read_lock();
        for (dev = dev_base; dev; dev = dev->next) {
                if ((in_dev = __in_dev_get(dev)) == NULL)
                        continue;
 
-               read_lock(&in_dev->lock);
                for_primary_ifa(in_dev) {
                        if (ifa->ifa_scope != RT_SCOPE_LINK &&
                            ifa->ifa_scope <= scope) {
-                               read_unlock(&in_dev->lock);
                                addr = ifa->ifa_local;
                                goto out_unlock_both;
                        }
                } endfor_ifa(in_dev);
-               read_unlock(&in_dev->lock);
        }
 out_unlock_both:
-       read_unlock(&inetdev_lock);
        read_unlock(&dev_base_lock);
+       rcu_read_unlock();
 out:
        return addr;
-out_unlock_inetdev:
-       read_unlock(&inetdev_lock);
-       goto out;
 }
 
 static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
@@ -860,29 +902,24 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
        struct in_device *in_dev;
 
        if (dev) {
-               read_lock(&inetdev_lock);
-               if ((in_dev = __in_dev_get(dev))) {
-                       read_lock(&in_dev->lock);
+               rcu_read_lock();
+               if ((in_dev = __in_dev_get(dev)))
                        addr = confirm_addr_indev(in_dev, dst, local, scope);
-                       read_unlock(&in_dev->lock);
-               }
-               read_unlock(&inetdev_lock);
+               rcu_read_unlock();
 
                return addr;
        }
 
        read_lock(&dev_base_lock);
-       read_lock(&inetdev_lock);
+       rcu_read_lock();
        for (dev = dev_base; dev; dev = dev->next) {
                if ((in_dev = __in_dev_get(dev))) {
-                       read_lock(&in_dev->lock);
                        addr = confirm_addr_indev(in_dev, dst, local, scope);
-                       read_unlock(&in_dev->lock);
                        if (addr)
                                break;
                }
        }
-       read_unlock(&inetdev_lock);
+       rcu_read_unlock();
        read_unlock(&dev_base_lock);
 
        return addr;
@@ -1042,6 +1079,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
        struct net_device *dev;
        struct in_device *in_dev;
        struct in_ifaddr *ifa;
+       struct sock *sk = skb->sk;
        int s_ip_idx, s_idx = cb->args[0];
 
        s_ip_idx = ip_idx = cb->args[1];
@@ -1051,26 +1089,27 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
                        continue;
                if (idx > s_idx)
                        s_ip_idx = 0;
-               read_lock(&inetdev_lock);
+               rcu_read_lock();
                if ((in_dev = __in_dev_get(dev)) == NULL) {
-                       read_unlock(&inetdev_lock);
+                       rcu_read_unlock();
                        continue;
                }
-               read_lock(&in_dev->lock);
+
                for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
                     ifa = ifa->ifa_next, ip_idx++) {
+                       if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
+                               !ifa_in_nx_info(ifa, sk->sk_nx_info))
+                               continue;
                        if (ip_idx < s_ip_idx)
                                continue;
                        if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
                                             cb->nlh->nlmsg_seq,
                                             RTM_NEWADDR) <= 0) {
-                               read_unlock(&in_dev->lock);
-                               read_unlock(&inetdev_lock);
+                               rcu_read_unlock();
                                goto done;
                        }
                }
-               read_unlock(&in_dev->lock);
-               read_unlock(&inetdev_lock);
+               rcu_read_unlock();
        }
 
 done:
@@ -1124,11 +1163,11 @@ void inet_forward_change(void)
        read_lock(&dev_base_lock);
        for (dev = dev_base; dev; dev = dev->next) {
                struct in_device *in_dev;
-               read_lock(&inetdev_lock);
+               rcu_read_lock();
                in_dev = __in_dev_get(dev);
                if (in_dev)
                        in_dev->cnf.forwarding = on;
-               read_unlock(&inetdev_lock);
+               rcu_read_unlock();
        }
        read_unlock(&dev_base_lock);
 
@@ -1136,12 +1175,12 @@ void inet_forward_change(void)
 }
 
 static int devinet_sysctl_forward(ctl_table *ctl, int write,
-                                 struct file* filp, void *buffer,
-                                 size_t *lenp)
+                                 struct file* filp, void __user *buffer,
+                                 size_t *lenp, loff_t *ppos)
 {
        int *valp = ctl->data;
        int val = *valp;
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
        if (write && *valp != val) {
                if (valp == &ipv4_devconf.forwarding)
@@ -1154,12 +1193,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 }
 
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
-                        struct file* filp, void *buffer,
-                        size_t *lenp)
+                        struct file* filp, void __user *buffer,
+                        size_t *lenp, loff_t *ppos)
 {
        int *valp = ctl->data;
        int val = *valp;
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
        if (write && *valp != val)
                rt_cache_flush(0);
@@ -1167,9 +1206,9 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
        return ret;
 }
 
-int ipv4_doint_and_flush_strategy(ctl_table *table, int *name, int nlen,
-                                 void *oldval, size_t *oldlenp,
-                                 void *newval, size_t newlen, 
+int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
+                                 void __user *oldval, size_t __user *oldlenp,
+                                 void __user *newval, size_t newlen, 
                                  void **context)
 {
        int *valp = table->data;
@@ -1181,7 +1220,7 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int *name, int nlen,
        if (newlen != sizeof(int))
                return -EINVAL;
 
-       if (get_user(new, (int *)newval))
+       if (get_user(new, (int __user *)newval))
                return -EFAULT;
 
        if (new == *valp)
@@ -1494,6 +1533,5 @@ EXPORT_SYMBOL(devinet_ioctl);
 EXPORT_SYMBOL(in_dev_finish_destroy);
 EXPORT_SYMBOL(inet_select_addr);
 EXPORT_SYMBOL(inetdev_by_index);
-EXPORT_SYMBOL(inetdev_lock);
 EXPORT_SYMBOL(register_inetaddr_notifier);
 EXPORT_SYMBOL(unregister_inetaddr_notifier);