X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fvserver%2Fnetwork.c;h=4c3eb435a52135f4b88b62be9a63f9dfe9fc980a;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=479a19b47af0b0595e5e41775138ee8da38f1442;hpb=b76fcd5f0c655b6e3e9bf534594357025421c66a;p=linux-2.6.git diff --git a/kernel/vserver/network.c b/kernel/vserver/network.c index 479a19b47..4c3eb435a 100644 --- a/kernel/vserver/network.c +++ b/kernel/vserver/network.c @@ -3,160 +3,216 @@ * * Virtual Server: Network Support * - * Copyright (C) 2003-2004 Herbert Pötzl + * Copyright (C) 2003-2005 Herbert Pötzl * * V0.01 broken out from vcontext V0.05 * V0.02 cleaned up implementation * V0.03 added equiv nx commands + * V0.04 switch to RCU based hash * */ #include #include -#include -#include +#include +#include +#include #include -LIST_HEAD(nx_infos); +/* __alloc_nx_info() -spinlock_t nxlist_lock - __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + * allocate an initialized nx_info struct + * doesn't make it visible (hash) */ - -/* - * struct nx_info allocation and deallocation - */ - -static struct nx_info *alloc_nx_info(void) +static struct nx_info *__alloc_nx_info(nid_t nid) { struct nx_info *new = NULL; - - nxdprintk("alloc_nx_info()\n"); + + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid); + /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); if (!new) return 0; - + memset (new, 0, sizeof(struct nx_info)); + new->nx_id = nid; + INIT_RCU_HEAD(&new->nx_rcu); + INIT_HLIST_NODE(&new->nx_hlist); + atomic_set(&new->nx_refcnt, 0); + atomic_set(&new->nx_usecnt, 0); + /* rest of init goes here */ - - nxdprintk("alloc_nx_info() = %p\n", new); + + vxdprintk(VXD_CBIT(nid, 0), + "alloc_nx_info() = %p", new); return new; } -void free_nx_info(struct nx_info *nxi) +/* __dealloc_nx_info() + + * final disposal of nx_info */ + +static void __dealloc_nx_info(struct nx_info *nxi) { - nxdprintk("free_nx_info(%p)\n", nxi); + vxdprintk(VXD_CBIT(nid, 0), + "dealloc_nx_info(%p)", nxi); + + nxi->nx_hlist.next = LIST_POISON1; + nxi->nx_id = -1; + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_refcnt)); + kfree(nxi); } -struct nx_info *create_nx_info(void) +static inline int __free_nx_info(struct nx_info *nxi) { - struct nx_info *new; - static int gnid = 1; - - nxdprintk("create_nx_info()\n"); - if (!(new = alloc_nx_info())) - return 0; + int usecnt, refcnt; - spin_lock(&nxlist_lock); + BUG_ON(!nxi); - /* new ip info */ - atomic_set(&new->nx_refcount, 1); - new->nx_id = gnid++; - list_add(&new->nx_list, &nx_infos); + usecnt = atomic_read(&nxi->nx_usecnt); + BUG_ON(usecnt < 0); - spin_unlock(&nxlist_lock); - return new; + refcnt = atomic_read(&nxi->nx_refcnt); + BUG_ON(refcnt < 0); + + if (!usecnt) + __dealloc_nx_info(nxi); + return usecnt; } +/* exported stuff */ + +void free_nx_info(struct nx_info *nxi) +{ + /* context shutdown is mandatory */ + // BUG_ON(nxi->nx_state != NXS_SHUTDOWN); + + // BUG_ON(nxi->nx_state & NXS_HASHED); + + BUG_ON(__free_nx_info(nxi)); +} -/* - * struct nx_info search by id - * assumes nxlist_lock is held - */ -static __inline__ struct nx_info *__find_nx_info(int id) +/* hash table for nx_info hash */ + +#define NX_HASH_SIZE 13 + +struct hlist_head nx_info_hash[NX_HASH_SIZE]; + +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(nid_t nid) { - struct nx_info *nxi; + return (nid % NX_HASH_SIZE); +} - list_for_each_entry(nxi, &nx_infos, nx_list) - if (nxi->nx_id == id) - return nxi; - return 0; + + +/* __hash_nx_info() + + * add the nxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_nx_info(struct nx_info *nxi) +{ + struct hlist_head *head; + + vxdprintk(VXD_CBIT(nid, 4), + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id); + get_nx_info(nxi); + head = &nx_info_hash[__hashval(nxi->nx_id)]; + hlist_add_head(&nxi->nx_hlist, head); } +/* __unhash_nx_info() -/* - * struct nx_info ref stuff - */ + * remove the nxi from the global hash table + * requires the hash_lock to be held */ -struct nx_info *find_nx_info(int id) +static inline void __unhash_nx_info(struct nx_info *nxi) { - struct nx_info *nxi; - - if (id < 0) { - nxi = current->nx_info; - get_nx_info(nxi); - } else { - spin_lock(&nxlist_lock); - if ((nxi = __find_nx_info(id))) - get_nx_info(nxi); - spin_unlock(&nxlist_lock); - } - return nxi; + vxd_assert_lock(&nx_info_hash_lock); + vxdprintk(VXD_CBIT(nid, 4), + "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id); + hlist_del(&nxi->nx_hlist); + put_nx_info(nxi); } -/* - * verify that id is a valid nid - */ -int nx_info_id_valid(int id) +/* __lookup_nx_info() + + * requires the hash_lock to be held + * doesn't increment the nx_refcnt */ + +static inline struct nx_info *__lookup_nx_info(nid_t nid) { - int valid; - - spin_lock(&nxlist_lock); - valid = (__find_nx_info(id) != NULL); - spin_unlock(&nxlist_lock); - return valid; + struct hlist_head *head = &nx_info_hash[__hashval(nid)]; + struct hlist_node *pos; + + vxd_assert_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + struct nx_info *nxi = + hlist_entry(pos, struct nx_info, nx_hlist); + + if (nxi->nx_id == nid) { + return nxi; + } + } + return NULL; } -/* - * dynamic context id ... - */ +/* __nx_dynamic_id() + + * find unused dynamic nid + * requires the hash_lock to be held */ -static __inline__ nid_t __nx_dynamic_id(void) +static inline nid_t __nx_dynamic_id(void) { static nid_t seq = MAX_N_CONTEXT; nid_t barrier = seq; - + + vxd_assert_lock(&nx_info_hash_lock); do { if (++seq > MAX_N_CONTEXT) seq = MIN_D_CONTEXT; - if (!__find_nx_info(seq)) + if (!__lookup_nx_info(seq)) { + vxdprintk(VXD_CBIT(nid, 4), + "__nx_dynamic_id: [#%d]", seq); return seq; + } } while (barrier != seq); return 0; } -static struct nx_info * __foc_nx_info(int id, int *err) +/* __loc_nx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct nx_info * __loc_nx_info(int id, int *err) { struct nx_info *new, *nxi = NULL; - - nxdprintk("foc_nx_info(%d)\n", id); - // if (!(new = alloc_nx_info(id))) { - if (!(new = alloc_nx_info())) { + + vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id); + + if (!(new = __alloc_nx_info(id))) { *err = -ENOMEM; return NULL; } - spin_lock(&nxlist_lock); + /* required to make dynamic xids unique */ + spin_lock(&nx_info_hash_lock); /* dynamic context requested */ - if (id == IP_DYNAMIC_ID) { + if (id == NX_DYNAMIC_ID) { id = __nx_dynamic_id(); if (!id) { printk(KERN_ERR "no dynamic context available.\n"); @@ -165,14 +221,16 @@ static struct nx_info * __foc_nx_info(int id, int *err) new->nx_id = id; } /* existing context requested */ - else if ((nxi = __find_nx_info(id))) { + else if ((nxi = __lookup_nx_info(id))) { /* context in setup is not available */ if (nxi->nx_flags & VXF_STATE_SETUP) { - nxdprintk("foc_nx_info(%d) = %p (not available)\n", id, nxi); + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (not available)", id, nxi); nxi = NULL; *err = -EBUSY; } else { - nxdprintk("foc_nx_info(%d) = %p (found)\n", id, nxi); + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (found)", id, nxi); get_nx_info(nxi); *err = 0; } @@ -180,50 +238,152 @@ static struct nx_info * __foc_nx_info(int id, int *err) } /* new context requested */ - nxdprintk("foc_nx_info(%d) = %p (new)\n", id, new); - atomic_set(&new->nx_refcount, 1); - list_add(&new->nx_list, &nx_infos); + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (new)", id, new); + __hash_nx_info(get_nx_info(new)); nxi = new, new = NULL; *err = 1; out_unlock: - spin_unlock(&nxlist_lock); + spin_unlock(&nx_info_hash_lock); if (new) - free_nx_info(new); + __dealloc_nx_info(new); + return nxi; +} + + + +/* exported stuff */ + + +void unhash_nx_info(struct nx_info *nxi) +{ + spin_lock(&nx_info_hash_lock); + __unhash_nx_info(nxi); + spin_unlock(&nx_info_hash_lock); +} + +/* locate_nx_info() + + * search for a nx_info and get() it + * negative id means current */ + +struct nx_info *locate_nx_info(int id) +{ + struct nx_info *nxi; + + if (id < 0) { + nxi = get_nx_info(current->nx_info); + } else { + spin_lock(&nx_info_hash_lock); + nxi = get_nx_info(__lookup_nx_info(id)); + spin_unlock(&nx_info_hash_lock); + } return nxi; } +/* nid_is_hashed() -struct nx_info *find_or_create_nx_info(int id) + * verify that nid is still hashed */ + +int nid_is_hashed(nid_t nid) { + int hashed; + + spin_lock(&nx_info_hash_lock); + hashed = (__lookup_nx_info(nid) != NULL); + spin_unlock(&nx_info_hash_lock); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACYNET + +struct nx_info *locate_or_create_nx_info(int id) +{ + int err; + + return __loc_nx_info(id, &err); +} + +struct nx_info *create_nx_info(void) +{ + struct nx_info *new; int err; - return __foc_nx_info(id, &err); + vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void"); + if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err))) + return NULL; + return new; } + +#endif + +#ifdef CONFIG_PROC_FS + +int get_nid_list(int index, unsigned int *nids, int size) +{ + int hindex, nr_nids = 0; + + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) { + struct hlist_head *head = &nx_info_hash[hindex]; + struct hlist_node *pos; + + spin_lock(&nx_info_hash_lock); + hlist_for_each(pos, head) { + struct nx_info *nxi; + + if (--index > 0) + continue; + + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + nids[nr_nids] = nxi->nx_id; + if (++nr_nids >= size) { + spin_unlock(&nx_info_hash_lock); + goto out; + } + } + /* keep the lock time short */ + spin_unlock(&nx_info_hash_lock); + } +out: + return nr_nids; +} +#endif + + /* * migrate task to new network */ int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) { - struct nx_info *old_nxi = task_get_nx_info(p); + struct nx_info *old_nxi; int ret = 0; - + if (!p || !nxi) BUG(); - nxdprintk("nx_migrate_task(%p,%p[#%d.%d)\n", p, nxi, - nxi->nx_id, atomic_read(&nxi->nx_refcount)); + vxdprintk(VXD_CBIT(nid, 5), + "nx_migrate_task(%p,%p[#%d.%d.%d])", + p, nxi, nxi->nx_id, + atomic_read(&nxi->nx_usecnt), + atomic_read(&nxi->nx_refcnt)); + + old_nxi = task_get_nx_info(p); if (old_nxi == nxi) goto out; task_lock(p); + /* should be handled in set_nx_info !! */ + if (old_nxi) + clr_nx_info(&p->nx_info); set_nx_info(&p->nx_info, nxi); p->nid = nxi->nx_id; task_unlock(p); - put_nx_info(old_nxi); + /* obsoleted by clr/set */ + // put_nx_info(old_nxi); out: put_nx_info(old_nxi); return ret; @@ -233,23 +393,14 @@ out: #include #include -static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi) -{ - int i, nbip; - - nbip = nxi->nbipv4; - for (i=0; iipv4[i] == addr) - return 1; - return 0; -} int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) { if (!nxi) return 1; - - return __addr_in_nx_info(ifa->ifa_address, nxi); + if (!ifa) + return 0; + return addr_in_nx_info(nxi, ifa->ifa_address); } int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) @@ -265,13 +416,63 @@ int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if (__addr_in_nx_info(ifa->ifa_address, nxi)) + if (addr_in_nx_info(nxi, ifa->ifa_address)) return 1; } return 0; } +/* + * check if address is covered by socket + * + * sk: the socket to check against + * addr: the address in question (must be != 0) + */ +static inline int __addr_in_socket(struct sock *sk, uint32_t addr) +{ + struct nx_info *nxi = sk->sk_nx_info; + uint32_t saddr = tcp_v4_rcv_saddr(sk); + + vxdprintk(VXD_CBIT(net, 5), + "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx", + sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (saddr) { + /* direct address match */ + return (saddr == addr); + } else if (nxi) { + /* match against nx_info */ + return addr_in_nx_info(nxi, addr); + } else { + /* unrestricted any socket */ + return 1; + } +} + +int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk) +{ + vxdprintk(VXD_CBIT(net, 2), + "nx_addr_conflict(%p,%p) %d.%d,%d.%d", + nxi, sk, VXD_QUAD(addr)); + + if (addr) { + /* check real address */ + return __addr_in_socket(sk, addr); + } else if (nxi) { + /* check against nx_info */ + int i, n = nxi->nbipv4; + + for (i=0; iipv4[i])) + return 1; + return 0; + } else { + /* check against any */ + return 1; + } +} /* vserver syscall commands below here */ @@ -283,22 +484,22 @@ int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) int vc_task_nid(uint32_t id, void __user *data) { - nid_t nid; - - if (id) { - struct task_struct *tsk; - - if (!vx_check(0, VX_ADMIN|VX_WATCH)) - return -EPERM; - - read_lock(&tasklist_lock); - tsk = find_task_by_pid(id); - nid = (tsk) ? tsk->nid : -ESRCH; - read_unlock(&tasklist_lock); - } - else - nid = current->nid; - return nid; + nid_t nid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + nid = (tsk) ? tsk->nid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + nid = current->nid; + return nid; } @@ -312,7 +513,7 @@ int vc_nx_info(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -329,7 +530,7 @@ int vc_nx_info(uint32_t id, void __user *data) int vc_net_create(uint32_t nid, void __user *data) { - // int ret = -ENOMEM; + // int ret = -ENOMEM; struct nx_info *new_nxi; int ret; @@ -342,7 +543,7 @@ int vc_net_create(uint32_t nid, void __user *data) if (nid < 1) return -EINVAL; - new_nxi = __foc_nx_info(nid, &ret); + new_nxi = __loc_nx_info(nid, &ret); if (!new_nxi) return ret; if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) { @@ -361,11 +562,11 @@ out_put: int vc_net_migrate(uint32_t id, void __user *data) { struct nx_info *nxi; - + if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; nx_migrate_task(current, nxi); @@ -383,7 +584,7 @@ int vc_net_add(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -402,7 +603,7 @@ int vc_net_remove(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -421,13 +622,12 @@ int vc_get_nflags(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; vc_data.flagword = nxi->nx_flags; - // vc_data.mask = ~0UL; /* special STATE flag handling */ vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME); @@ -449,7 +649,7 @@ int vc_set_nflags(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -472,7 +672,7 @@ int vc_get_ncaps(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -495,7 +695,7 @@ int vc_set_ncaps(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -509,5 +709,5 @@ int vc_set_ncaps(uint32_t id, void __user *data) #include EXPORT_SYMBOL_GPL(free_nx_info); -EXPORT_SYMBOL_GPL(nxlist_lock); +EXPORT_SYMBOL_GPL(unhash_nx_info);