2 * linux/kernel/vserver/network.c
4 * Virtual Server: Network Support
6 * Copyright (C) 2003-2004 Herbert Pƶtzl
8 * V0.01 broken out from vcontext V0.05
9 * V0.02 cleaned up implementation
10 * V0.03 added equiv nx commands
11 * V0.04 switch to RCU based hash
15 #include <linux/config.h>
16 #include <linux/slab.h>
17 #include <linux/vserver.h>
18 #include <linux/vs_base.h>
19 #include <linux/rcupdate.h>
22 #include <asm/errno.h>
27 * allocate an initialized nx_info struct
28 * doesn't make it visible (hash) */
30 static struct nx_info *__alloc_nx_info(nid_t nid)
32 struct nx_info *new = NULL;
34 vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
36 /* would this benefit from a slab cache? */
37 new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
41 memset (new, 0, sizeof(struct nx_info));
43 INIT_RCU_HEAD(&new->nx_rcu);
44 INIT_HLIST_NODE(&new->nx_hlist);
45 atomic_set(&new->nx_refcnt, 0);
46 atomic_set(&new->nx_usecnt, 0);
48 /* rest of init goes here */
50 vxdprintk(VXD_CBIT(nid, 0),
51 "alloc_nx_info() = %p", new);
55 /* __dealloc_nx_info()
57 * final disposal of nx_info */
59 static void __dealloc_nx_info(struct nx_info *nxi)
61 vxdprintk(VXD_CBIT(nid, 0),
62 "dealloc_nx_info(%p)", nxi);
64 nxi->nx_hlist.next = LIST_POISON1;
67 BUG_ON(atomic_read(&nxi->nx_usecnt));
68 BUG_ON(atomic_read(&nxi->nx_refcnt));
74 /* hash table for nx_info hash */
76 #define NX_HASH_SIZE 13
78 struct hlist_head nx_info_hash[NX_HASH_SIZE];
80 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
83 static inline unsigned int __hashval(nid_t nid)
85 return (nid % NX_HASH_SIZE);
92 * add the nxi to the global hash table
93 * requires the hash_lock to be held */
95 static inline void __hash_nx_info(struct nx_info *nxi)
97 struct hlist_head *head;
99 vxdprintk(VXD_CBIT(nid, 4),
100 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
102 head = &nx_info_hash[__hashval(nxi->nx_id)];
103 hlist_add_head_rcu(&nxi->nx_hlist, head);
106 /* __unhash_nx_info()
108 * remove the nxi from the global hash table
109 * requires the hash_lock to be held */
111 static inline void __unhash_nx_info(struct nx_info *nxi)
113 vxdprintk(VXD_CBIT(nid, 4),
114 "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
115 hlist_del_rcu(&nxi->nx_hlist);
120 /* __lookup_nx_info()
122 * requires the rcu_read_lock()
123 * doesn't increment the nx_refcnt */
125 static inline struct nx_info *__lookup_nx_info(nid_t nid)
127 struct hlist_head *head = &nx_info_hash[__hashval(nid)];
128 struct hlist_node *pos;
130 hlist_for_each_rcu(pos, head) {
131 struct nx_info *nxi =
132 hlist_entry(pos, struct nx_info, nx_hlist);
134 if (nxi->nx_id == nid) {
144 * find unused dynamic nid
145 * requires the hash_lock to be held */
147 static inline nid_t __nx_dynamic_id(void)
149 static nid_t seq = MAX_N_CONTEXT;
153 if (++seq > MAX_N_CONTEXT)
155 if (!__lookup_nx_info(seq)) {
156 vxdprintk(VXD_CBIT(nid, 4),
157 "__nx_dynamic_id: [#%d]", seq);
160 } while (barrier != seq);
166 * locate or create the requested context
167 * get() it and if new hash it */
169 static struct nx_info * __loc_nx_info(int id, int *err)
171 struct nx_info *new, *nxi = NULL;
173 vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id);
175 if (!(new = __alloc_nx_info(id))) {
180 spin_lock(&nx_info_hash_lock);
182 /* dynamic context requested */
183 if (id == NX_DYNAMIC_ID) {
184 id = __nx_dynamic_id();
186 printk(KERN_ERR "no dynamic context available.\n");
191 /* existing context requested */
192 else if ((nxi = __lookup_nx_info(id))) {
193 /* context in setup is not available */
194 if (nxi->nx_flags & VXF_STATE_SETUP) {
195 vxdprintk(VXD_CBIT(nid, 0),
196 "loc_nx_info(%d) = %p (not available)", id, nxi);
200 vxdprintk(VXD_CBIT(nid, 0),
201 "loc_nx_info(%d) = %p (found)", id, nxi);
208 /* new context requested */
209 vxdprintk(VXD_CBIT(nid, 0),
210 "loc_nx_info(%d) = %p (new)", id, new);
211 __hash_nx_info(get_nx_info(new));
212 nxi = new, new = NULL;
216 spin_unlock(&nx_info_hash_lock);
218 __dealloc_nx_info(new);
229 void rcu_free_nx_info(struct rcu_head *head)
231 struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
234 BUG_ON(!nxi || !head);
236 usecnt = atomic_read(&nxi->nx_usecnt);
239 refcnt = atomic_read(&nxi->nx_refcnt);
242 vxdprintk(VXD_CBIT(nid, 3),
243 "rcu_free_nx_info(%p): uc=%d", nxi, usecnt);
245 __dealloc_nx_info(nxi);
247 printk("!!! rcu didn't free\n");
250 void unhash_nx_info(struct nx_info *nxi)
252 spin_lock(&nx_info_hash_lock);
253 __unhash_nx_info(nxi);
254 spin_unlock(&nx_info_hash_lock);
259 * search for a nx_info and get() it
260 * negative id means current */
262 struct nx_info *locate_nx_info(int id)
267 nxi = get_nx_info(current->nx_info);
270 nxi = get_nx_info(__lookup_nx_info(id));
276 /* nx_info_is_hashed()
278 * verify that nid is still hashed */
280 int nx_info_is_hashed(nid_t nid)
285 hashed = (__lookup_nx_info(nid) != NULL);
290 #ifdef CONFIG_VSERVER_LEGACY
292 struct nx_info *locate_or_create_nx_info(int id)
296 return __loc_nx_info(id, &err);
299 struct nx_info *create_nx_info(void)
304 vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void");
305 if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
313 #ifdef CONFIG_PROC_FS
315 int get_nid_list(int index, unsigned int *nids, int size)
317 int hindex, nr_nids = 0;
320 for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
321 struct hlist_head *head = &nx_info_hash[hindex];
322 struct hlist_node *pos;
324 hlist_for_each_rcu(pos, head) {
330 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
331 nids[nr_nids] = nxi->nx_id;
332 if (++nr_nids >= size)
344 * migrate task to new network
347 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
349 struct nx_info *old_nxi;
355 vxdprintk(VXD_CBIT(nid, 5),
356 "nx_migrate_task(%p,%p[#%d.%d.%d])",
358 atomic_read(&nxi->nx_usecnt),
359 atomic_read(&nxi->nx_refcnt));
361 old_nxi = task_get_nx_info(p);
366 /* should be handled in set_nx_info !! */
368 clr_nx_info(&p->nx_info);
369 set_nx_info(&p->nx_info, nxi);
373 /* obsoleted by clr/set */
374 // put_nx_info(old_nxi);
376 put_nx_info(old_nxi);
381 #include <linux/netdevice.h>
382 #include <linux/inetdevice.h>
385 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
391 return addr_in_nx_info(nxi, ifa->ifa_address);
394 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
396 struct in_device *in_dev = __in_dev_get(dev);
397 struct in_ifaddr **ifap = NULL;
398 struct in_ifaddr *ifa = NULL;
405 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
406 ifap = &ifa->ifa_next) {
407 if (addr_in_nx_info(nxi, ifa->ifa_address))
414 * check if address is covered by socket
416 * sk: the socket to check against
417 * addr: the address in question (must be != 0)
419 static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
421 struct nx_info *nxi = sk->sk_nx_info;
422 uint32_t saddr = tcp_v4_rcv_saddr(sk);
424 vxdprintk(VXD_CBIT(net, 5),
425 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
426 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
427 (sk->sk_socket?sk->sk_socket->flags:0));
430 /* direct address match */
431 return (saddr == addr);
433 /* match against nx_info */
434 return addr_in_nx_info(nxi, addr);
436 /* unrestricted any socket */
442 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
444 vxdprintk(VXD_CBIT(net, 2),
445 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
446 nxi, sk, VXD_QUAD(addr));
449 /* check real address */
450 return __addr_in_socket(sk, addr);
452 /* check against nx_info */
453 int i, n = nxi->nbipv4;
456 if (__addr_in_socket(sk, nxi->ipv4[i]))
460 /* check against any */
466 /* vserver syscall commands below here */
468 /* taks nid and nx_info functions */
470 #include <asm/uaccess.h>
473 int vc_task_nid(uint32_t id, void __user *data)
478 struct task_struct *tsk;
480 if (!vx_check(0, VX_ADMIN|VX_WATCH))
483 read_lock(&tasklist_lock);
484 tsk = find_task_by_real_pid(id);
485 nid = (tsk) ? tsk->nid : -ESRCH;
486 read_unlock(&tasklist_lock);
494 int vc_nx_info(uint32_t id, void __user *data)
497 struct vcmd_nx_info_v0 vc_data;
499 if (!vx_check(0, VX_ADMIN))
501 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
504 nxi = locate_nx_info(id);
508 vc_data.nid = nxi->nx_id;
511 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
517 /* network functions */
519 int vc_net_create(uint32_t nid, void __user *data)
521 // int ret = -ENOMEM;
522 struct nx_info *new_nxi;
525 if (!capable(CAP_SYS_ADMIN))
528 if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID))
534 new_nxi = __loc_nx_info(nid, &ret);
537 if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
542 ret = new_nxi->nx_id;
543 nx_migrate_task(current, new_nxi);
545 put_nx_info(new_nxi);
550 int vc_net_migrate(uint32_t id, void __user *data)
554 if (!capable(CAP_SYS_ADMIN))
557 nxi = locate_nx_info(id);
560 nx_migrate_task(current, nxi);
565 int vc_net_add(uint32_t id, void __user *data)
568 struct vcmd_net_nx_v0 vc_data;
570 if (!capable(CAP_SYS_ADMIN))
572 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
575 nxi = locate_nx_info(id);
579 // add ip to net context here
584 int vc_net_remove(uint32_t id, void __user *data)
587 struct vcmd_net_nx_v0 vc_data;
589 if (!capable(CAP_SYS_ADMIN))
591 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
594 nxi = locate_nx_info(id);
598 // rem ip from net context here
605 int vc_get_nflags(uint32_t id, void __user *data)
608 struct vcmd_net_flags_v0 vc_data;
610 if (!capable(CAP_SYS_ADMIN))
613 nxi = locate_nx_info(id);
617 vc_data.flagword = nxi->nx_flags;
619 /* special STATE flag handling */
620 vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
624 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
629 int vc_set_nflags(uint32_t id, void __user *data)
632 struct vcmd_net_flags_v0 vc_data;
633 uint64_t mask, trigger;
635 if (!capable(CAP_SYS_ADMIN))
637 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
640 nxi = locate_nx_info(id);
644 /* special STATE flag handling */
645 mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME);
646 trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
647 // if (trigger & IPF_STATE_SETUP)
649 nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
650 vc_data.flagword, mask);
655 int vc_get_ncaps(uint32_t id, void __user *data)
658 struct vcmd_net_caps_v0 vc_data;
660 if (!capable(CAP_SYS_ADMIN))
663 nxi = locate_nx_info(id);
667 vc_data.ncaps = nxi->nx_ncaps;
668 vc_data.cmask = ~0UL;
671 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
676 int vc_set_ncaps(uint32_t id, void __user *data)
679 struct vcmd_net_caps_v0 vc_data;
681 if (!capable(CAP_SYS_ADMIN))
683 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
686 nxi = locate_nx_info(id);
690 nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
691 vc_data.ncaps, vc_data.cmask);
697 #include <linux/module.h>
699 EXPORT_SYMBOL_GPL(rcu_free_nx_info);
700 EXPORT_SYMBOL_GPL(nx_info_hash_lock);
701 EXPORT_SYMBOL_GPL(unhash_nx_info);