vserver 1.9.5.x5

[linux-2.6.git] / kernel / vserver / network.c
diff --git a/kernel/vserver/network.c b/kernel/vserver/network.c

index b9b9327..4c3eb43 100644 (file)
--- a/kernel/vserver/network.c
+++ b/kernel/vserver/network.c
@@ -3,7 +3,7 @@
   *
   *  Virtual Server: Network Support
   *
- *  Copyright (C) 2003-2004  Herbert Pötzl
+ *  Copyright (C) 2003-2005  Herbert Pötzl
   *
   *  V0.01  broken out from vcontext V0.05
   *  V0.02  cleaned up implementation
@@ -14,10 +14,9 @@
  
  #include <linux/config.h>
  #include <linux/slab.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
-#include <linux/vs_network.h>
+#include <linux/vserver/network_cmd.h>
  #include <linux/rcupdate.h>
+#include <net/tcp.h>
  
  #include <asm/errno.h>
  
@@ -30,14 +29,14 @@
  static struct nx_info *__alloc_nx_info(nid_t nid)
  {
         struct nx_info *new = NULL;
-       
-       nxdprintk("alloc_nx_info()\n");
+
+       vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
  
         /* would this benefit from a slab cache? */
         new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
         if (!new)
                 return 0;
-       
+
         memset (new, 0, sizeof(struct nx_info));
         new->nx_id = nid;
         INIT_RCU_HEAD(&new->nx_rcu);
@@ -46,8 +45,9 @@ static struct nx_info *__alloc_nx_info(nid_t nid)
         atomic_set(&new->nx_usecnt, 0);
  
         /* rest of init goes here */
-       
-       nxdprintk("alloc_nx_info() = %p\n", new);
+
+       vxdprintk(VXD_CBIT(nid, 0),
+               "alloc_nx_info() = %p", new);
         return new;
  }
  
@@ -57,21 +57,51 @@ static struct nx_info *__alloc_nx_info(nid_t nid)
  
  static void __dealloc_nx_info(struct nx_info *nxi)
  {
-       nxdprintk("dealloc_nx_info(%p)\n", nxi);
+       vxdprintk(VXD_CBIT(nid, 0),
+               "dealloc_nx_info(%p)", nxi);
  
         nxi->nx_hlist.next = LIST_POISON1;
         nxi->nx_id = -1;
-       
+
         BUG_ON(atomic_read(&nxi->nx_usecnt));
         BUG_ON(atomic_read(&nxi->nx_refcnt));
  
         kfree(nxi);
  }
  
+static inline int __free_nx_info(struct nx_info *nxi)
+{
+       int usecnt, refcnt;
+
+       BUG_ON(!nxi);
+
+       usecnt = atomic_read(&nxi->nx_usecnt);
+       BUG_ON(usecnt < 0);
+
+       refcnt = atomic_read(&nxi->nx_refcnt);
+       BUG_ON(refcnt < 0);
+
+       if (!usecnt)
+               __dealloc_nx_info(nxi);
+       return usecnt;
+}
+
+/*     exported stuff                                          */
+
+void free_nx_info(struct nx_info *nxi)
+{
+       /* context shutdown is mandatory */
+       // BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
+
+       // BUG_ON(nxi->nx_state & NXS_HASHED);
+
+       BUG_ON(__free_nx_info(nxi));
+}
+
  
  /*     hash table for nx_info hash */
  
-#define        NX_HASH_SIZE    13
+#define NX_HASH_SIZE   13
  
  struct hlist_head nx_info_hash[NX_HASH_SIZE];
  
@@ -93,11 +123,12 @@ static inline unsigned int __hashval(nid_t nid)
  static inline void __hash_nx_info(struct nx_info *nxi)
  {
         struct hlist_head *head;
-       
-       nxdprintk("__hash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
+
+       vxdprintk(VXD_CBIT(nid, 4),
+               "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
         get_nx_info(nxi);
         head = &nx_info_hash[__hashval(nxi->nx_id)];
-       hlist_add_head_rcu(&nxi->nx_hlist, head);
+       hlist_add_head(&nxi->nx_hlist, head);
  }
  
  /*     __unhash_nx_info()
@@ -107,15 +138,17 @@ static inline void __hash_nx_info(struct nx_info *nxi)
  
  static inline void __unhash_nx_info(struct nx_info *nxi)
  {
-       nxdprintk("__unhash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
-       hlist_del_rcu(&nxi->nx_hlist);
+       vxd_assert_lock(&nx_info_hash_lock);
+       vxdprintk(VXD_CBIT(nid, 4),
+               "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
+       hlist_del(&nxi->nx_hlist);
         put_nx_info(nxi);
  }
  
  
  /*     __lookup_nx_info()
  
-       * requires the rcu_read_lock()
+       * requires the hash_lock to be held
         * doesn't increment the nx_refcnt                       */
  
  static inline struct nx_info *__lookup_nx_info(nid_t nid)
@@ -123,7 +156,8 @@ static inline struct nx_info *__lookup_nx_info(nid_t nid)
         struct hlist_head *head = &nx_info_hash[__hashval(nid)];
         struct hlist_node *pos;
  
-       hlist_for_each_rcu(pos, head) {
+       vxd_assert_lock(&nx_info_hash_lock);
+       hlist_for_each(pos, head) {
                 struct nx_info *nxi =
                         hlist_entry(pos, struct nx_info, nx_hlist);
  
@@ -144,12 +178,16 @@ static inline nid_t __nx_dynamic_id(void)
  {
         static nid_t seq = MAX_N_CONTEXT;
         nid_t barrier = seq;
-       
+
+       vxd_assert_lock(&nx_info_hash_lock);
         do {
                 if (++seq > MAX_N_CONTEXT)
                         seq = MIN_D_CONTEXT;
-               if (!__lookup_nx_info(seq))
+               if (!__lookup_nx_info(seq)) {
+                       vxdprintk(VXD_CBIT(nid, 4),
+                               "__nx_dynamic_id: [#%d]", seq);
                         return seq;
+               }
         } while (barrier != seq);
         return 0;
  }
@@ -162,14 +200,15 @@ static inline nid_t __nx_dynamic_id(void)
  static struct nx_info * __loc_nx_info(int id, int *err)
  {
         struct nx_info *new, *nxi = NULL;
-       
-       nxdprintk("loc_nx_info(%d)\n", id);
+
+       vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id);
  
         if (!(new = __alloc_nx_info(id))) {
                 *err = -ENOMEM;
                 return NULL;
         }
  
+       /* required to make dynamic xids unique */
         spin_lock(&nx_info_hash_lock);
  
         /* dynamic context requested */
@@ -185,11 +224,13 @@ static struct nx_info * __loc_nx_info(int id, int *err)
         else if ((nxi = __lookup_nx_info(id))) {
                 /* context in setup is not available */
                 if (nxi->nx_flags & VXF_STATE_SETUP) {
-                       nxdprintk("loc_nx_info(%d) = %p (not available)\n", id, nxi);
+                       vxdprintk(VXD_CBIT(nid, 0),
+                               "loc_nx_info(%d) = %p (not available)", id, nxi);
                         nxi = NULL;
                         *err = -EBUSY;
                 } else {
-                       nxdprintk("loc_nx_info(%d) = %p (found)\n", id, nxi);
+                       vxdprintk(VXD_CBIT(nid, 0),
+                               "loc_nx_info(%d) = %p (found)", id, nxi);
                         get_nx_info(nxi);
                         *err = 0;
                 }
@@ -197,7 +238,8 @@ static struct nx_info * __loc_nx_info(int id, int *err)
         }
  
         /* new context requested */
-       nxdprintk("loc_nx_info(%d) = %p (new)\n", id, new);
+       vxdprintk(VXD_CBIT(nid, 0),
+               "loc_nx_info(%d) = %p (new)", id, new);
         __hash_nx_info(get_nx_info(new));
         nxi = new, new = NULL;
         *err = 1;
@@ -214,25 +256,6 @@ out_unlock:
  /*     exported stuff                                          */
  
  
-
-
-void rcu_free_nx_info(void *obj)
-{
-       struct nx_info *nxi = obj;
-       int usecnt, refcnt;
-
-       usecnt = atomic_read(&nxi->nx_usecnt);
-       BUG_ON(usecnt < 0);
-
-       refcnt = atomic_read(&nxi->nx_refcnt);
-       BUG_ON(refcnt < 0);
-
-       if (!usecnt)
-               __dealloc_nx_info(nxi);
-       else
-               printk("!!! rcu didn't free\n");
-}
-
  void unhash_nx_info(struct nx_info *nxi)
  {
         spin_lock(&nx_info_hash_lock);
@@ -242,38 +265,38 @@ void unhash_nx_info(struct nx_info *nxi)
  
  /*     locate_nx_info()
  
-       * search for a nx_info and get() it                     
+       * search for a nx_info and get() it
         * negative id means current                             */
  
  struct nx_info *locate_nx_info(int id)
  {
         struct nx_info *nxi;
-       
+
         if (id < 0) {
                 nxi = get_nx_info(current->nx_info);
         } else {
-               rcu_read_lock();
+               spin_lock(&nx_info_hash_lock);
                 nxi = get_nx_info(__lookup_nx_info(id));
-               rcu_read_unlock();
+               spin_unlock(&nx_info_hash_lock);
         }
         return nxi;
  }
  
-/*     nx_info_is_hashed()
+/*     nid_is_hashed()
  
         * verify that nid is still hashed                       */
  
-int nx_info_is_hashed(nid_t nid)
+int nid_is_hashed(nid_t nid)
  {
         int hashed;
  
-       rcu_read_lock();
+       spin_lock(&nx_info_hash_lock);
         hashed = (__lookup_nx_info(nid) != NULL);
-       rcu_read_unlock();
+       spin_unlock(&nx_info_hash_lock);
         return hashed;
  }
  
-#ifdef CONFIG_VSERVER_LEGACY
+#ifdef CONFIG_VSERVER_LEGACYNET
  
  struct nx_info *locate_or_create_nx_info(int id)
  {
@@ -286,8 +309,8 @@ struct nx_info *create_nx_info(void)
  {
         struct nx_info *new;
         int err;
-       
-       nxdprintk("create_nx_info()\n");
+
+       vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void");
         if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
                 return NULL;
         return new;
@@ -298,33 +321,32 @@ struct nx_info *create_nx_info(void)
  
  #ifdef CONFIG_PROC_FS
  
-#define hlist_for_each_rcu(pos, head) \
-        for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
-               pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
-
  int get_nid_list(int index, unsigned int *nids, int size)
  {
         int hindex, nr_nids = 0;
  
-       rcu_read_lock();
         for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
                 struct hlist_head *head = &nx_info_hash[hindex];
                 struct hlist_node *pos;
  
-               hlist_for_each_rcu(pos, head) {
+               spin_lock(&nx_info_hash_lock);
+               hlist_for_each(pos, head) {
                         struct nx_info *nxi;
  
                         if (--index > 0)
                                 continue;
  
                         nxi = hlist_entry(pos, struct nx_info, nx_hlist);
-                       nids[nr_nids] = nxi->nx_id;                     
-                       if (++nr_nids >= size)
+                       nids[nr_nids] = nxi->nx_id;
+                       if (++nr_nids >= size) {
+                               spin_unlock(&nx_info_hash_lock);
                                 goto out;
+                       }
                 }
+               /* keep the lock time short */
+               spin_unlock(&nx_info_hash_lock);
         }
  out:
-       rcu_read_unlock();
         return nr_nids;
  }
  #endif
@@ -338,11 +360,12 @@ int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
  {
         struct nx_info *old_nxi;
         int ret = 0;
-       
+
         if (!p || !nxi)
                 BUG();
  
-       nxdprintk("nx_migrate_task(%p,%p[#%d.%d.%d])\n",
+       vxdprintk(VXD_CBIT(nid, 5),
+               "nx_migrate_task(%p,%p[#%d.%d.%d])",
                 p, nxi, nxi->nx_id,
                 atomic_read(&nxi->nx_usecnt),
                 atomic_read(&nxi->nx_refcnt));
@@ -370,22 +393,14 @@ out:
  #include <linux/netdevice.h>
  #include <linux/inetdevice.h>
  
-static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi)
-{
-       int i, nbip;
-
-       nbip = nxi->nbipv4;
-       for (i=0; i<nbip; i++)
-               if (nxi->ipv4[i] == addr)
-                       return 1;
-       return 0;
-}
  
  int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
  {
-       if (nxi && ifa)
-               return __addr_in_nx_info(ifa->ifa_address, nxi);
-       return 1;
+       if (!nxi)
+               return 1;
+       if (!ifa)
+               return 0;
+       return addr_in_nx_info(nxi, ifa->ifa_address);
  }
  
  int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
@@ -401,13 +416,63 @@ int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
  
         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
                 ifap = &ifa->ifa_next) {
-               if (__addr_in_nx_info(ifa->ifa_address, nxi))
+               if (addr_in_nx_info(nxi, ifa->ifa_address))
                         return 1;
         }
         return 0;
  }
  
+/*
+ *     check if address is covered by socket
+ *
+ *     sk:     the socket to check against
+ *     addr:   the address in question (must be != 0)
+ */
+static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
+{
+       struct nx_info *nxi = sk->sk_nx_info;
+       uint32_t saddr = tcp_v4_rcv_saddr(sk);
+
+       vxdprintk(VXD_CBIT(net, 5),
+               "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
+               sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
+               (sk->sk_socket?sk->sk_socket->flags:0));
+
+       if (saddr) {
+               /* direct address match */
+               return (saddr == addr);
+       } else if (nxi) {
+               /* match against nx_info */
+               return addr_in_nx_info(nxi, addr);
+       } else {
+               /* unrestricted any socket */
+               return 1;
+       }
+}
+
  
+int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
+{
+       vxdprintk(VXD_CBIT(net, 2),
+               "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
+               nxi, sk, VXD_QUAD(addr));
+
+       if (addr) {
+               /* check real address */
+               return __addr_in_socket(sk, addr);
+       } else if (nxi) {
+               /* check against nx_info */
+               int i, n = nxi->nbipv4;
+
+               for (i=0; i<n; i++)
+                       if (__addr_in_socket(sk, nxi->ipv4[i]))
+                               return 1;
+               return 0;
+       } else {
+               /* check against any */
+               return 1;
+       }
+}
  
  
  /* vserver syscall commands below here */
@@ -419,22 +484,22 @@ int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
  
  int vc_task_nid(uint32_t id, void __user *data)
  {
-        nid_t nid;
-
-        if (id) {
-                struct task_struct *tsk;
-
-                if (!vx_check(0, VX_ADMIN|VX_WATCH))
-                        return -EPERM;
-
-                read_lock(&tasklist_lock);
-                tsk = find_task_by_pid(id);
-                nid = (tsk) ? tsk->nid : -ESRCH;
-                read_unlock(&tasklist_lock);
-        }
-        else
-                nid = current->nid;
-        return nid;
+       nid_t nid;
+
+       if (id) {
+               struct task_struct *tsk;
+
+               if (!vx_check(0, VX_ADMIN|VX_WATCH))
+                       return -EPERM;
+
+               read_lock(&tasklist_lock);
+               tsk = find_task_by_real_pid(id);
+               nid = (tsk) ? tsk->nid : -ESRCH;
+               read_unlock(&tasklist_lock);
+       }
+       else
+               nid = current->nid;
+       return nid;
  }
  
  
@@ -465,7 +530,7 @@ int vc_nx_info(uint32_t id, void __user *data)
  
  int vc_net_create(uint32_t nid, void __user *data)
  {
-        // int ret = -ENOMEM;
+       // int ret = -ENOMEM;
         struct nx_info *new_nxi;
         int ret;
  
@@ -497,7 +562,7 @@ out_put:
  int vc_net_migrate(uint32_t id, void __user *data)
  {
         struct nx_info *nxi;
-       
+
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
@@ -643,7 +708,6 @@ int vc_set_ncaps(uint32_t id, void __user *data)
  
  #include <linux/module.h>
  
-EXPORT_SYMBOL_GPL(rcu_free_nx_info);
-EXPORT_SYMBOL_GPL(nx_info_hash_lock);
+EXPORT_SYMBOL_GPL(free_nx_info);
  EXPORT_SYMBOL_GPL(unhash_nx_info);