X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fvserver%2Fnetwork.c;h=1ceb1fcb806c616c4730e520aa51d698fd98e458;hb=34a75f0025b9cf803b6a88db032e6ad6950c9313;hp=e87c8b61779c61a43d7dba0b2b063939a0dd33a1;hpb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;p=linux-2.6.git diff --git a/kernel/vserver/network.c b/kernel/vserver/network.c index e87c8b617..1ceb1fcb8 100644 --- a/kernel/vserver/network.c +++ b/kernel/vserver/network.c @@ -3,19 +3,18 @@ * * Virtual Server: Network Support * - * Copyright (C) 2003-2004 Herbert Pötzl + * Copyright (C) 2003-2005 Herbert Pötzl * * V0.01 broken out from vcontext V0.05 * V0.02 cleaned up implementation * V0.03 added equiv nx commands * V0.04 switch to RCU based hash + * V0.05 and back to locking again * */ -#include #include -#include -#include +#include #include #include @@ -40,15 +39,17 @@ static struct nx_info *__alloc_nx_info(nid_t nid) memset (new, 0, sizeof(struct nx_info)); new->nx_id = nid; - INIT_RCU_HEAD(&new->nx_rcu); INIT_HLIST_NODE(&new->nx_hlist); - atomic_set(&new->nx_refcnt, 0); atomic_set(&new->nx_usecnt, 0); + atomic_set(&new->nx_tasks, 0); + new->nx_state = 0; + + new->nx_flags = NXF_INIT_SET; /* rest of init goes here */ vxdprintk(VXD_CBIT(nid, 0), - "alloc_nx_info() = %p", new); + "alloc_nx_info(%d) = %p", nid, new); return new; } @@ -65,11 +66,34 @@ static void __dealloc_nx_info(struct nx_info *nxi) nxi->nx_id = -1; BUG_ON(atomic_read(&nxi->nx_usecnt)); - BUG_ON(atomic_read(&nxi->nx_refcnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + nxi->nx_state |= NXS_RELEASED; kfree(nxi); } +static void __shutdown_nx_info(struct nx_info *nxi) +{ + nxi->nx_state |= NXS_SHUTDOWN; + vs_net_change(nxi, VSC_NETDOWN); +} + +/* exported stuff */ + +void free_nx_info(struct nx_info *nxi) +{ + /* context shutdown is mandatory */ + BUG_ON(nxi->nx_state != NXS_SHUTDOWN); + + /* context must not be hashed */ + BUG_ON(nxi->nx_state & NXS_HASHED); + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_tasks)); + + __dealloc_nx_info(nxi); +} + /* hash table for nx_info hash */ @@ -96,11 +120,16 @@ static inline void __hash_nx_info(struct nx_info *nxi) { struct hlist_head *head; + vxd_assert_lock(&nx_info_hash_lock); vxdprintk(VXD_CBIT(nid, 4), "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id); - get_nx_info(nxi); + + /* context must not be hashed */ + BUG_ON(nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state |= NXS_HASHED; head = &nx_info_hash[__hashval(nxi->nx_id)]; - hlist_add_head_rcu(&nxi->nx_hlist, head); + hlist_add_head(&nxi->nx_hlist, head); } /* __unhash_nx_info() @@ -110,32 +139,42 @@ static inline void __hash_nx_info(struct nx_info *nxi) static inline void __unhash_nx_info(struct nx_info *nxi) { + vxd_assert_lock(&nx_info_hash_lock); vxdprintk(VXD_CBIT(nid, 4), "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id); - hlist_del_rcu(&nxi->nx_hlist); - put_nx_info(nxi); + + /* context must be hashed */ + BUG_ON(!nx_info_state(nxi, NXS_HASHED)); + + nxi->nx_state &= ~NXS_HASHED; + hlist_del(&nxi->nx_hlist); } /* __lookup_nx_info() - * requires the rcu_read_lock() + * requires the hash_lock to be held * doesn't increment the nx_refcnt */ static inline struct nx_info *__lookup_nx_info(nid_t nid) { struct hlist_head *head = &nx_info_hash[__hashval(nid)]; struct hlist_node *pos; + struct nx_info *nxi; - hlist_for_each_rcu(pos, head) { - struct nx_info *nxi = - hlist_entry(pos, struct nx_info, nx_hlist); + vxd_assert_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + nxi = hlist_entry(pos, struct nx_info, nx_hlist); - if (nxi->nx_id == nid) { - return nxi; - } + if (nxi->nx_id == nid) + goto found; } - return NULL; + nxi = NULL; +found: + vxdprintk(VXD_CBIT(nid, 0), + "__lookup_nx_info(#%u): %p[#%u]", + nid, nxi, nxi?nxi->nx_id:0); + return nxi; } @@ -149,6 +188,7 @@ static inline nid_t __nx_dynamic_id(void) static nid_t seq = MAX_N_CONTEXT; nid_t barrier = seq; + vxd_assert_lock(&nx_info_hash_lock); do { if (++seq > MAX_N_CONTEXT) seq = MIN_D_CONTEXT; @@ -161,22 +201,21 @@ static inline nid_t __nx_dynamic_id(void) return 0; } -/* __loc_nx_info() +/* __create_nx_info() - * locate or create the requested context - * get() it and if new hash it */ + * create the requested context + * get() and hash it */ -static struct nx_info * __loc_nx_info(int id, int *err) +static struct nx_info * __create_nx_info(int id) { struct nx_info *new, *nxi = NULL; - vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id); + vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id); - if (!(new = __alloc_nx_info(id))) { - *err = -ENOMEM; - return NULL; - } + if (!(new = __alloc_nx_info(id))) + return ERR_PTR(-ENOMEM); + /* required to make dynamic xids unique */ spin_lock(&nx_info_hash_lock); /* dynamic context requested */ @@ -184,33 +223,34 @@ static struct nx_info * __loc_nx_info(int id, int *err) id = __nx_dynamic_id(); if (!id) { printk(KERN_ERR "no dynamic context available.\n"); + nxi = ERR_PTR(-EAGAIN); goto out_unlock; } new->nx_id = id; } - /* existing context requested */ + /* static context requested */ else if ((nxi = __lookup_nx_info(id))) { - /* context in setup is not available */ - if (nxi->nx_flags & VXF_STATE_SETUP) { - vxdprintk(VXD_CBIT(nid, 0), - "loc_nx_info(%d) = %p (not available)", id, nxi); - nxi = NULL; - *err = -EBUSY; - } else { - vxdprintk(VXD_CBIT(nid, 0), - "loc_nx_info(%d) = %p (found)", id, nxi); - get_nx_info(nxi); - *err = 0; - } + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) = %p (already there)", id, nxi); + if (nx_info_flags(nxi, NXF_STATE_SETUP, 0)) + nxi = ERR_PTR(-EBUSY); + else + nxi = ERR_PTR(-EEXIST); + goto out_unlock; + } + /* dynamic nid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(nid, 0), + "create_nx_info(%d) (dynamic rejected)", id); + nxi = ERR_PTR(-EINVAL); goto out_unlock; } - /* new context requested */ + /* new context */ vxdprintk(VXD_CBIT(nid, 0), - "loc_nx_info(%d) = %p (new)", id, new); + "create_nx_info(%d) = %p (new)", id, new); __hash_nx_info(get_nx_info(new)); nxi = new, new = NULL; - *err = 1; out_unlock: spin_unlock(&nx_info_hash_lock); @@ -224,91 +264,56 @@ out_unlock: /* exported stuff */ - - -void rcu_free_nx_info(struct rcu_head *head) -{ - struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu); - int usecnt, refcnt; - - BUG_ON(!nxi || !head); - - usecnt = atomic_read(&nxi->nx_usecnt); - BUG_ON(usecnt < 0); - - refcnt = atomic_read(&nxi->nx_refcnt); - BUG_ON(refcnt < 0); - - vxdprintk(VXD_CBIT(nid, 3), - "rcu_free_nx_info(%p): uc=%d", nxi, usecnt); - if (!usecnt) - __dealloc_nx_info(nxi); - else - printk("!!! rcu didn't free\n"); -} - void unhash_nx_info(struct nx_info *nxi) { + __shutdown_nx_info(nxi); spin_lock(&nx_info_hash_lock); __unhash_nx_info(nxi); spin_unlock(&nx_info_hash_lock); } -/* locate_nx_info() +#ifdef CONFIG_VSERVER_LEGACYNET + +struct nx_info *create_nx_info(void) +{ + return __create_nx_info(NX_DYNAMIC_ID); +} + +#endif + +/* lookup_nx_info() * search for a nx_info and get() it * negative id means current */ -struct nx_info *locate_nx_info(int id) +struct nx_info *lookup_nx_info(int id) { - struct nx_info *nxi; + struct nx_info *nxi = NULL; if (id < 0) { nxi = get_nx_info(current->nx_info); - } else { - rcu_read_lock(); + } else if (id > 1) { + spin_lock(&nx_info_hash_lock); nxi = get_nx_info(__lookup_nx_info(id)); - rcu_read_unlock(); + spin_unlock(&nx_info_hash_lock); } return nxi; } -/* nx_info_is_hashed() +/* nid_is_hashed() * verify that nid is still hashed */ -int nx_info_is_hashed(nid_t nid) +int nid_is_hashed(nid_t nid) { int hashed; - rcu_read_lock(); + spin_lock(&nx_info_hash_lock); hashed = (__lookup_nx_info(nid) != NULL); - rcu_read_unlock(); + spin_unlock(&nx_info_hash_lock); return hashed; } -#ifdef CONFIG_VSERVER_LEGACY - -struct nx_info *locate_or_create_nx_info(int id) -{ - int err; - - return __loc_nx_info(id, &err); -} - -struct nx_info *create_nx_info(void) -{ - struct nx_info *new; - int err; - - vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void"); - if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err))) - return NULL; - return new; -} - - -#endif #ifdef CONFIG_PROC_FS @@ -316,12 +321,12 @@ int get_nid_list(int index, unsigned int *nids, int size) { int hindex, nr_nids = 0; - rcu_read_lock(); for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) { struct hlist_head *head = &nx_info_hash[hindex]; struct hlist_node *pos; - hlist_for_each_rcu(pos, head) { + spin_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { struct nx_info *nxi; if (--index > 0) @@ -329,12 +334,15 @@ int get_nid_list(int index, unsigned int *nids, int size) nxi = hlist_entry(pos, struct nx_info, nx_hlist); nids[nr_nids] = nxi->nx_id; - if (++nr_nids >= size) + if (++nr_nids >= size) { + spin_unlock(&nx_info_hash_lock); goto out; + } } + /* keep the lock time short */ + spin_unlock(&nx_info_hash_lock); } out: - rcu_read_unlock(); return nr_nids; } #endif @@ -342,6 +350,7 @@ out: /* * migrate task to new network + * gets nxi, puts old_nxi on change */ int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) @@ -356,58 +365,71 @@ int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) "nx_migrate_task(%p,%p[#%d.%d.%d])", p, nxi, nxi->nx_id, atomic_read(&nxi->nx_usecnt), - atomic_read(&nxi->nx_refcnt)); + atomic_read(&nxi->nx_tasks)); + /* maybe disallow this completely? */ old_nxi = task_get_nx_info(p); if (old_nxi == nxi) goto out; task_lock(p); - /* should be handled in set_nx_info !! */ if (old_nxi) clr_nx_info(&p->nx_info); + claim_nx_info(nxi, p); set_nx_info(&p->nx_info, nxi); p->nid = nxi->nx_id; task_unlock(p); - /* obsoleted by clr/set */ - // put_nx_info(old_nxi); + vxdprintk(VXD_CBIT(nid, 5), + "moved task %p into nxi:%p[#%d]", + p, nxi, nxi->nx_id); + + if (old_nxi) + release_nx_info(old_nxi, p); out: put_nx_info(old_nxi); return ret; } +#ifdef CONFIG_INET + #include #include - int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) { if (!nxi) return 1; if (!ifa) return 0; - return addr_in_nx_info(nxi, ifa->ifa_address); + return addr_in_nx_info(nxi, ifa->ifa_local); } int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) { - struct in_device *in_dev = __in_dev_get(dev); - struct in_ifaddr **ifap = NULL; - struct in_ifaddr *ifa = NULL; + struct in_device *in_dev; + struct in_ifaddr **ifap; + struct in_ifaddr *ifa; + int ret = 0; if (!nxi) return 1; + + in_dev = in_dev_get(dev); if (!in_dev) - return 0; + goto out; for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if (addr_in_nx_info(nxi, ifa->ifa_address)) - return 1; + if (addr_in_nx_info(nxi, ifa->ifa_local)) { + ret = 1; + break; + } } - return 0; + in_dev_put(in_dev); +out: + return ret; } /* @@ -419,7 +441,7 @@ int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) static inline int __addr_in_socket(struct sock *sk, uint32_t addr) { struct nx_info *nxi = sk->sk_nx_info; - uint32_t saddr = tcp_v4_rcv_saddr(sk); + uint32_t saddr = inet_rcv_saddr(sk); vxdprintk(VXD_CBIT(net, 5), "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx", @@ -462,6 +484,30 @@ int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk) } } +#endif /* CONFIG_INET */ + +void nx_set_persistent(struct nx_info *nxi) +{ + get_nx_info(nxi); + claim_nx_info(nxi, current); +} + +void nx_clear_persistent(struct nx_info *nxi) +{ + vxdprintk(VXD_CBIT(nid, 6), + "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id); + + release_nx_info(nxi, current); + put_nx_info(nxi); +} + +void nx_update_persistent(struct nx_info *nxi) +{ + if (nx_info_flags(nxi, NXF_PERSISTENT, 0)) + nx_set_persistent(nxi); + else + nx_clear_persistent(nxi); +} /* vserver syscall commands below here */ @@ -486,7 +532,7 @@ int vc_task_nid(uint32_t id, void __user *data) read_unlock(&tasklist_lock); } else - nid = current->nid; + nid = nx_current_nid(); return nid; } @@ -501,7 +547,7 @@ int vc_nx_info(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) return -EPERM; - nxi = locate_nx_info(id); + nxi = lookup_nx_info(id); if (!nxi) return -ESRCH; @@ -518,30 +564,47 @@ int vc_nx_info(uint32_t id, void __user *data) int vc_net_create(uint32_t nid, void __user *data) { - // int ret = -ENOMEM; + struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET }; struct nx_info *new_nxi; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; - if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID)) + if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID)) return -EINVAL; - - if (nid < 1) + if (nid < 2) return -EINVAL; - new_nxi = __loc_nx_info(nid, &ret); - if (!new_nxi) - return ret; - if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) { - ret = -EEXIST; - goto out_put; - } + new_nxi = __create_nx_info(nid); + if (IS_ERR(new_nxi)) + return PTR_ERR(new_nxi); + + /* initial flags */ + new_nxi->nx_flags = vc_data.flagword; - ret = new_nxi->nx_id; - nx_migrate_task(current, new_nxi); -out_put: + /* get a reference for persistent contexts */ + if ((vc_data.flagword & NXF_PERSISTENT)) + nx_set_persistent(new_nxi); + + ret = -ENOEXEC; + if (vs_net_change(new_nxi, VSC_NETUP)) + goto out_unhash; + ret = nx_migrate_task(current, new_nxi); + if (!ret) { + /* return context id on success */ + ret = new_nxi->nx_id; + goto out; + } +out_unhash: + /* prepare for context disposal */ + new_nxi->nx_state |= NXS_SHUTDOWN; + if ((vc_data.flagword & NXF_PERSISTENT)) + nx_clear_persistent(new_nxi); + __unhash_nx_info(new_nxi); +out: put_nx_info(new_nxi); return ret; } @@ -554,7 +617,7 @@ int vc_net_migrate(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = locate_nx_info(id); + nxi = lookup_nx_info(id); if (!nxi) return -ESRCH; nx_migrate_task(current, nxi); @@ -562,45 +625,86 @@ int vc_net_migrate(uint32_t id, void __user *data) return 0; } -int vc_net_add(uint32_t id, void __user *data) +int vc_net_add(uint32_t nid, void __user *data) { + struct vcmd_net_addr_v0 vc_data; struct nx_info *nxi; - struct vcmd_net_nx_v0 vc_data; + int index, pos, ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user (&vc_data, data, sizeof(vc_data))) + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = locate_nx_info(id); + switch (vc_data.type) { + case NXA_TYPE_IPV4: + if ((vc_data.count < 1) || (vc_data.count > 4)) + return -EINVAL; + break; + + default: + break; + } + + nxi = lookup_nx_info(nid); if (!nxi) return -ESRCH; - // add ip to net context here + switch (vc_data.type) { + case NXA_TYPE_IPV4: + index = 0; + while ((index < vc_data.count) && + ((pos = nxi->nbipv4) < NB_IPV4ROOT)) { + nxi->ipv4[pos] = vc_data.ip[index]; + nxi->mask[pos] = vc_data.mask[index]; + index++; + nxi->nbipv4++; + } + ret = index; + break; + + case NXA_TYPE_IPV4|NXA_MOD_BCAST: + nxi->v4_bcast = vc_data.ip[0]; + ret = 1; + break; + + default: + ret = -EINVAL; + break; + } + put_nx_info(nxi); - return 0; + return ret; } -int vc_net_remove(uint32_t id, void __user *data) +int vc_net_remove(uint32_t nid, void __user *data) { + struct vcmd_net_addr_v0 vc_data; struct nx_info *nxi; - struct vcmd_net_nx_v0 vc_data; + int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user (&vc_data, data, sizeof(vc_data))) + if (data && copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = locate_nx_info(id); + nxi = lookup_nx_info(nid); if (!nxi) return -ESRCH; - // rem ip from net context here - put_nx_info(nxi); - return 0; -} + switch (vc_data.type) { + case NXA_TYPE_ANY: + nxi->nbipv4 = 0; + break; + default: + ret = -EINVAL; + break; + } + put_nx_info(nxi); + return ret; +} int vc_get_nflags(uint32_t id, void __user *data) { @@ -610,14 +714,14 @@ int vc_get_nflags(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = locate_nx_info(id); + nxi = lookup_nx_info(id); if (!nxi) return -ESRCH; vc_data.flagword = nxi->nx_flags; /* special STATE flag handling */ - vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME); + vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME); put_nx_info(nxi); @@ -637,17 +741,19 @@ int vc_set_nflags(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = locate_nx_info(id); + nxi = lookup_nx_info(id); if (!nxi) return -ESRCH; /* special STATE flag handling */ - mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME); + mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME); trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword); - // if (trigger & IPF_STATE_SETUP) nxi->nx_flags = vx_mask_flags(nxi->nx_flags, vc_data.flagword, mask); + if (trigger & NXF_PERSISTENT) + nx_update_persistent(nxi); + put_nx_info(nxi); return 0; } @@ -660,7 +766,7 @@ int vc_get_ncaps(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = locate_nx_info(id); + nxi = lookup_nx_info(id); if (!nxi) return -ESRCH; @@ -683,7 +789,7 @@ int vc_set_ncaps(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = locate_nx_info(id); + nxi = lookup_nx_info(id); if (!nxi) return -ESRCH; @@ -696,7 +802,6 @@ int vc_set_ncaps(uint32_t id, void __user *data) #include -EXPORT_SYMBOL_GPL(rcu_free_nx_info); -EXPORT_SYMBOL_GPL(nx_info_hash_lock); +EXPORT_SYMBOL_GPL(free_nx_info); EXPORT_SYMBOL_GPL(unhash_nx_info);