2 * linux/kernel/vserver/network.c
4 * Virtual Server: Network Support
6 * Copyright (C) 2003-2005 Herbert Pƶtzl
8 * V0.01 broken out from vcontext V0.05
9 * V0.02 cleaned up implementation
10 * V0.03 added equiv nx commands
11 * V0.04 switch to RCU based hash
15 #include <linux/config.h>
16 #include <linux/slab.h>
17 #include <linux/vserver/network_cmd.h>
18 #include <linux/rcupdate.h>
21 #include <asm/errno.h>
26 * allocate an initialized nx_info struct
27 * doesn't make it visible (hash) */
29 static struct nx_info *__alloc_nx_info(nid_t nid)
31 struct nx_info *new = NULL;
33 vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
35 /* would this benefit from a slab cache? */
36 new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
40 memset (new, 0, sizeof(struct nx_info));
42 INIT_RCU_HEAD(&new->nx_rcu);
43 INIT_HLIST_NODE(&new->nx_hlist);
44 atomic_set(&new->nx_refcnt, 0);
45 atomic_set(&new->nx_usecnt, 0);
47 /* rest of init goes here */
49 vxdprintk(VXD_CBIT(nid, 0),
50 "alloc_nx_info() = %p", new);
54 /* __dealloc_nx_info()
56 * final disposal of nx_info */
58 static void __dealloc_nx_info(struct nx_info *nxi)
60 vxdprintk(VXD_CBIT(nid, 0),
61 "dealloc_nx_info(%p)", nxi);
63 nxi->nx_hlist.next = LIST_POISON1;
66 BUG_ON(atomic_read(&nxi->nx_usecnt));
67 BUG_ON(atomic_read(&nxi->nx_refcnt));
72 static inline int __free_nx_info(struct nx_info *nxi)
78 usecnt = atomic_read(&nxi->nx_usecnt);
81 refcnt = atomic_read(&nxi->nx_refcnt);
85 __dealloc_nx_info(nxi);
91 void free_nx_info(struct nx_info *nxi)
93 /* context shutdown is mandatory */
94 // BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
96 // BUG_ON(nxi->nx_state & NXS_HASHED);
98 BUG_ON(__free_nx_info(nxi));
102 /* hash table for nx_info hash */
104 #define NX_HASH_SIZE 13
106 struct hlist_head nx_info_hash[NX_HASH_SIZE];
108 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
111 static inline unsigned int __hashval(nid_t nid)
113 return (nid % NX_HASH_SIZE);
120 * add the nxi to the global hash table
121 * requires the hash_lock to be held */
123 static inline void __hash_nx_info(struct nx_info *nxi)
125 struct hlist_head *head;
127 vxdprintk(VXD_CBIT(nid, 4),
128 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
130 head = &nx_info_hash[__hashval(nxi->nx_id)];
131 hlist_add_head(&nxi->nx_hlist, head);
134 /* __unhash_nx_info()
136 * remove the nxi from the global hash table
137 * requires the hash_lock to be held */
139 static inline void __unhash_nx_info(struct nx_info *nxi)
141 vxd_assert_lock(&nx_info_hash_lock);
142 vxdprintk(VXD_CBIT(nid, 4),
143 "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
144 hlist_del(&nxi->nx_hlist);
149 /* __lookup_nx_info()
151 * requires the hash_lock to be held
152 * doesn't increment the nx_refcnt */
154 static inline struct nx_info *__lookup_nx_info(nid_t nid)
156 struct hlist_head *head = &nx_info_hash[__hashval(nid)];
157 struct hlist_node *pos;
159 vxd_assert_lock(&nx_info_hash_lock);
160 hlist_for_each(pos, head) {
161 struct nx_info *nxi =
162 hlist_entry(pos, struct nx_info, nx_hlist);
164 if (nxi->nx_id == nid) {
174 * find unused dynamic nid
175 * requires the hash_lock to be held */
177 static inline nid_t __nx_dynamic_id(void)
179 static nid_t seq = MAX_N_CONTEXT;
182 vxd_assert_lock(&nx_info_hash_lock);
184 if (++seq > MAX_N_CONTEXT)
186 if (!__lookup_nx_info(seq)) {
187 vxdprintk(VXD_CBIT(nid, 4),
188 "__nx_dynamic_id: [#%d]", seq);
191 } while (barrier != seq);
197 * locate or create the requested context
198 * get() it and if new hash it */
200 static struct nx_info * __loc_nx_info(int id, int *err)
202 struct nx_info *new, *nxi = NULL;
204 vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id);
206 if (!(new = __alloc_nx_info(id))) {
211 /* required to make dynamic xids unique */
212 spin_lock(&nx_info_hash_lock);
214 /* dynamic context requested */
215 if (id == NX_DYNAMIC_ID) {
216 id = __nx_dynamic_id();
218 printk(KERN_ERR "no dynamic context available.\n");
223 /* existing context requested */
224 else if ((nxi = __lookup_nx_info(id))) {
225 /* context in setup is not available */
226 if (nxi->nx_flags & VXF_STATE_SETUP) {
227 vxdprintk(VXD_CBIT(nid, 0),
228 "loc_nx_info(%d) = %p (not available)", id, nxi);
232 vxdprintk(VXD_CBIT(nid, 0),
233 "loc_nx_info(%d) = %p (found)", id, nxi);
240 /* new context requested */
241 vxdprintk(VXD_CBIT(nid, 0),
242 "loc_nx_info(%d) = %p (new)", id, new);
243 __hash_nx_info(get_nx_info(new));
244 nxi = new, new = NULL;
248 spin_unlock(&nx_info_hash_lock);
250 __dealloc_nx_info(new);
259 void unhash_nx_info(struct nx_info *nxi)
261 spin_lock(&nx_info_hash_lock);
262 __unhash_nx_info(nxi);
263 spin_unlock(&nx_info_hash_lock);
268 * search for a nx_info and get() it
269 * negative id means current */
271 struct nx_info *locate_nx_info(int id)
276 nxi = get_nx_info(current->nx_info);
278 spin_lock(&nx_info_hash_lock);
279 nxi = get_nx_info(__lookup_nx_info(id));
280 spin_unlock(&nx_info_hash_lock);
287 * verify that nid is still hashed */
289 int nid_is_hashed(nid_t nid)
293 spin_lock(&nx_info_hash_lock);
294 hashed = (__lookup_nx_info(nid) != NULL);
295 spin_unlock(&nx_info_hash_lock);
299 #ifdef CONFIG_VSERVER_LEGACYNET
301 struct nx_info *locate_or_create_nx_info(int id)
305 return __loc_nx_info(id, &err);
308 struct nx_info *create_nx_info(void)
313 vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void");
314 if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
322 #ifdef CONFIG_PROC_FS
324 int get_nid_list(int index, unsigned int *nids, int size)
326 int hindex, nr_nids = 0;
328 for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
329 struct hlist_head *head = &nx_info_hash[hindex];
330 struct hlist_node *pos;
332 spin_lock(&nx_info_hash_lock);
333 hlist_for_each(pos, head) {
339 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
340 nids[nr_nids] = nxi->nx_id;
341 if (++nr_nids >= size) {
342 spin_unlock(&nx_info_hash_lock);
346 /* keep the lock time short */
347 spin_unlock(&nx_info_hash_lock);
356 * migrate task to new network
359 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
361 struct nx_info *old_nxi;
367 vxdprintk(VXD_CBIT(nid, 5),
368 "nx_migrate_task(%p,%p[#%d.%d.%d])",
370 atomic_read(&nxi->nx_usecnt),
371 atomic_read(&nxi->nx_refcnt));
373 old_nxi = task_get_nx_info(p);
378 /* should be handled in set_nx_info !! */
380 clr_nx_info(&p->nx_info);
381 set_nx_info(&p->nx_info, nxi);
385 /* obsoleted by clr/set */
386 // put_nx_info(old_nxi);
388 put_nx_info(old_nxi);
393 #include <linux/netdevice.h>
394 #include <linux/inetdevice.h>
397 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
403 return addr_in_nx_info(nxi, ifa->ifa_address);
406 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
408 struct in_device *in_dev = __in_dev_get(dev);
409 struct in_ifaddr **ifap = NULL;
410 struct in_ifaddr *ifa = NULL;
417 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
418 ifap = &ifa->ifa_next) {
419 if (addr_in_nx_info(nxi, ifa->ifa_address))
426 * check if address is covered by socket
428 * sk: the socket to check against
429 * addr: the address in question (must be != 0)
431 static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
433 struct nx_info *nxi = sk->sk_nx_info;
434 uint32_t saddr = tcp_v4_rcv_saddr(sk);
436 vxdprintk(VXD_CBIT(net, 5),
437 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
438 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
439 (sk->sk_socket?sk->sk_socket->flags:0));
442 /* direct address match */
443 return (saddr == addr);
445 /* match against nx_info */
446 return addr_in_nx_info(nxi, addr);
448 /* unrestricted any socket */
454 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
456 vxdprintk(VXD_CBIT(net, 2),
457 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
458 nxi, sk, VXD_QUAD(addr));
461 /* check real address */
462 return __addr_in_socket(sk, addr);
464 /* check against nx_info */
465 int i, n = nxi->nbipv4;
468 if (__addr_in_socket(sk, nxi->ipv4[i]))
472 /* check against any */
478 /* vserver syscall commands below here */
480 /* taks nid and nx_info functions */
482 #include <asm/uaccess.h>
485 int vc_task_nid(uint32_t id, void __user *data)
490 struct task_struct *tsk;
492 if (!vx_check(0, VX_ADMIN|VX_WATCH))
495 read_lock(&tasklist_lock);
496 tsk = find_task_by_real_pid(id);
497 nid = (tsk) ? tsk->nid : -ESRCH;
498 read_unlock(&tasklist_lock);
506 int vc_nx_info(uint32_t id, void __user *data)
509 struct vcmd_nx_info_v0 vc_data;
511 if (!vx_check(0, VX_ADMIN))
513 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
516 nxi = locate_nx_info(id);
520 vc_data.nid = nxi->nx_id;
523 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
529 /* network functions */
531 int vc_net_create(uint32_t nid, void __user *data)
533 // int ret = -ENOMEM;
534 struct nx_info *new_nxi;
537 if (!capable(CAP_SYS_ADMIN))
540 if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID))
546 new_nxi = __loc_nx_info(nid, &ret);
549 if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
554 ret = new_nxi->nx_id;
555 nx_migrate_task(current, new_nxi);
557 put_nx_info(new_nxi);
562 int vc_net_migrate(uint32_t id, void __user *data)
566 if (!capable(CAP_SYS_ADMIN))
569 nxi = locate_nx_info(id);
572 nx_migrate_task(current, nxi);
577 int vc_net_add(uint32_t id, void __user *data)
580 struct vcmd_net_nx_v0 vc_data;
582 if (!capable(CAP_SYS_ADMIN))
584 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
587 nxi = locate_nx_info(id);
591 // add ip to net context here
596 int vc_net_remove(uint32_t id, void __user *data)
599 struct vcmd_net_nx_v0 vc_data;
601 if (!capable(CAP_SYS_ADMIN))
603 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
606 nxi = locate_nx_info(id);
610 // rem ip from net context here
617 int vc_get_nflags(uint32_t id, void __user *data)
620 struct vcmd_net_flags_v0 vc_data;
622 if (!capable(CAP_SYS_ADMIN))
625 nxi = locate_nx_info(id);
629 vc_data.flagword = nxi->nx_flags;
631 /* special STATE flag handling */
632 vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
636 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
641 int vc_set_nflags(uint32_t id, void __user *data)
644 struct vcmd_net_flags_v0 vc_data;
645 uint64_t mask, trigger;
647 if (!capable(CAP_SYS_ADMIN))
649 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
652 nxi = locate_nx_info(id);
656 /* special STATE flag handling */
657 mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME);
658 trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
659 // if (trigger & IPF_STATE_SETUP)
661 nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
662 vc_data.flagword, mask);
667 int vc_get_ncaps(uint32_t id, void __user *data)
670 struct vcmd_net_caps_v0 vc_data;
672 if (!capable(CAP_SYS_ADMIN))
675 nxi = locate_nx_info(id);
679 vc_data.ncaps = nxi->nx_ncaps;
680 vc_data.cmask = ~0UL;
683 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
688 int vc_set_ncaps(uint32_t id, void __user *data)
691 struct vcmd_net_caps_v0 vc_data;
693 if (!capable(CAP_SYS_ADMIN))
695 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
698 nxi = locate_nx_info(id);
702 nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
703 vc_data.ncaps, vc_data.cmask);
709 #include <linux/module.h>
711 EXPORT_SYMBOL_GPL(free_nx_info);
712 EXPORT_SYMBOL_GPL(unhash_nx_info);