2 * linux/kernel/vserver/network.c
4 * Virtual Server: Network Support
6 * Copyright (C) 2003-2007 Herbert Pƶtzl
8 * V0.01 broken out from vcontext V0.05
9 * V0.02 cleaned up implementation
10 * V0.03 added equiv nx commands
11 * V0.04 switch to RCU based hash
12 * V0.05 and back to locking again
13 * V0.06 changed vcmds to nxi arg
14 * V0.07 have __create claim() the nxi
18 #include <linux/slab.h>
19 #include <linux/rcupdate.h>
21 #include <linux/vserver/network_cmd.h>
23 #include <asm/errno.h>
24 #include <linux/vserver/base.h>
25 #include <linux/vserver/network_cmd.h>
28 atomic_t nx_global_ctotal = ATOMIC_INIT(0);
29 atomic_t nx_global_cactive = ATOMIC_INIT(0);
34 * allocate an initialized nx_info struct
35 * doesn't make it visible (hash) */
37 static struct nx_info *__alloc_nx_info(nid_t nid)
39 struct nx_info *new = NULL;
41 vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
43 /* would this benefit from a slab cache? */
44 new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
48 memset (new, 0, sizeof(struct nx_info));
50 INIT_HLIST_NODE(&new->nx_hlist);
51 atomic_set(&new->nx_usecnt, 0);
52 atomic_set(&new->nx_tasks, 0);
55 new->nx_flags = NXF_INIT_SET;
57 /* rest of init goes here */
59 vxdprintk(VXD_CBIT(nid, 0),
60 "alloc_nx_info(%d) = %p", nid, new);
61 atomic_inc(&nx_global_ctotal);
65 /* __dealloc_nx_info()
67 * final disposal of nx_info */
69 static void __dealloc_nx_info(struct nx_info *nxi)
71 vxdprintk(VXD_CBIT(nid, 0),
72 "dealloc_nx_info(%p)", nxi);
74 nxi->nx_hlist.next = LIST_POISON1;
77 BUG_ON(atomic_read(&nxi->nx_usecnt));
78 BUG_ON(atomic_read(&nxi->nx_tasks));
80 nxi->nx_state |= NXS_RELEASED;
82 atomic_dec(&nx_global_ctotal);
85 static void __shutdown_nx_info(struct nx_info *nxi)
87 nxi->nx_state |= NXS_SHUTDOWN;
88 vs_net_change(nxi, VSC_NETDOWN);
93 void free_nx_info(struct nx_info *nxi)
95 /* context shutdown is mandatory */
96 BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
98 /* context must not be hashed */
99 BUG_ON(nxi->nx_state & NXS_HASHED);
101 BUG_ON(atomic_read(&nxi->nx_usecnt));
102 BUG_ON(atomic_read(&nxi->nx_tasks));
104 __dealloc_nx_info(nxi);
108 /* hash table for nx_info hash */
110 #define NX_HASH_SIZE 13
112 struct hlist_head nx_info_hash[NX_HASH_SIZE];
114 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
117 static inline unsigned int __hashval(nid_t nid)
119 return (nid % NX_HASH_SIZE);
126 * add the nxi to the global hash table
127 * requires the hash_lock to be held */
129 static inline void __hash_nx_info(struct nx_info *nxi)
131 struct hlist_head *head;
133 vxd_assert_lock(&nx_info_hash_lock);
134 vxdprintk(VXD_CBIT(nid, 4),
135 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
137 /* context must not be hashed */
138 BUG_ON(nx_info_state(nxi, NXS_HASHED));
140 nxi->nx_state |= NXS_HASHED;
141 head = &nx_info_hash[__hashval(nxi->nx_id)];
142 hlist_add_head(&nxi->nx_hlist, head);
143 atomic_inc(&nx_global_cactive);
146 /* __unhash_nx_info()
148 * remove the nxi from the global hash table
149 * requires the hash_lock to be held */
151 static inline void __unhash_nx_info(struct nx_info *nxi)
153 vxd_assert_lock(&nx_info_hash_lock);
154 vxdprintk(VXD_CBIT(nid, 4),
155 "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
156 atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
158 /* context must be hashed */
159 BUG_ON(!nx_info_state(nxi, NXS_HASHED));
160 /* but without tasks */
161 BUG_ON(atomic_read(&nxi->nx_tasks));
163 nxi->nx_state &= ~NXS_HASHED;
164 hlist_del(&nxi->nx_hlist);
165 atomic_dec(&nx_global_cactive);
169 /* __lookup_nx_info()
171 * requires the hash_lock to be held
172 * doesn't increment the nx_refcnt */
174 static inline struct nx_info *__lookup_nx_info(nid_t nid)
176 struct hlist_head *head = &nx_info_hash[__hashval(nid)];
177 struct hlist_node *pos;
180 vxd_assert_lock(&nx_info_hash_lock);
181 hlist_for_each(pos, head) {
182 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
184 if (nxi->nx_id == nid)
189 vxdprintk(VXD_CBIT(nid, 0),
190 "__lookup_nx_info(#%u): %p[#%u]",
191 nid, nxi, nxi?nxi->nx_id:0);
198 * find unused dynamic nid
199 * requires the hash_lock to be held */
201 static inline nid_t __nx_dynamic_id(void)
203 static nid_t seq = MAX_N_CONTEXT;
206 vxd_assert_lock(&nx_info_hash_lock);
208 if (++seq > MAX_N_CONTEXT)
210 if (!__lookup_nx_info(seq)) {
211 vxdprintk(VXD_CBIT(nid, 4),
212 "__nx_dynamic_id: [#%d]", seq);
215 } while (barrier != seq);
219 /* __create_nx_info()
221 * create the requested context
222 * get(), claim() and hash it */
224 static struct nx_info * __create_nx_info(int id)
226 struct nx_info *new, *nxi = NULL;
228 vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
230 if (!(new = __alloc_nx_info(id)))
231 return ERR_PTR(-ENOMEM);
233 /* required to make dynamic xids unique */
234 spin_lock(&nx_info_hash_lock);
236 /* dynamic context requested */
237 if (id == NX_DYNAMIC_ID) {
238 #ifdef CONFIG_VSERVER_DYNAMIC_IDS
239 id = __nx_dynamic_id();
241 printk(KERN_ERR "no dynamic context available.\n");
242 nxi = ERR_PTR(-EAGAIN);
247 printk(KERN_ERR "dynamic contexts disabled.\n");
248 nxi = ERR_PTR(-EINVAL);
252 /* static context requested */
253 else if ((nxi = __lookup_nx_info(id))) {
254 vxdprintk(VXD_CBIT(nid, 0),
255 "create_nx_info(%d) = %p (already there)", id, nxi);
256 if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
257 nxi = ERR_PTR(-EBUSY);
259 nxi = ERR_PTR(-EEXIST);
262 /* dynamic nid creation blocker */
263 else if (id >= MIN_D_CONTEXT) {
264 vxdprintk(VXD_CBIT(nid, 0),
265 "create_nx_info(%d) (dynamic rejected)", id);
266 nxi = ERR_PTR(-EINVAL);
271 vxdprintk(VXD_CBIT(nid, 0),
272 "create_nx_info(%d) = %p (new)", id, new);
273 claim_nx_info(new, NULL);
274 __hash_nx_info(get_nx_info(new));
275 nxi = new, new = NULL;
278 spin_unlock(&nx_info_hash_lock);
280 __dealloc_nx_info(new);
289 void unhash_nx_info(struct nx_info *nxi)
291 __shutdown_nx_info(nxi);
292 spin_lock(&nx_info_hash_lock);
293 __unhash_nx_info(nxi);
294 spin_unlock(&nx_info_hash_lock);
297 #ifdef CONFIG_VSERVER_LEGACYNET
299 struct nx_info *create_nx_info(void)
301 return __create_nx_info(NX_DYNAMIC_ID);
308 * search for a nx_info and get() it
309 * negative id means current */
311 struct nx_info *lookup_nx_info(int id)
313 struct nx_info *nxi = NULL;
316 nxi = get_nx_info(current->nx_info);
318 spin_lock(&nx_info_hash_lock);
319 nxi = get_nx_info(__lookup_nx_info(id));
320 spin_unlock(&nx_info_hash_lock);
327 * verify that nid is still hashed */
329 int nid_is_hashed(nid_t nid)
333 spin_lock(&nx_info_hash_lock);
334 hashed = (__lookup_nx_info(nid) != NULL);
335 spin_unlock(&nx_info_hash_lock);
340 #ifdef CONFIG_PROC_FS
344 * get a subset of hashed nids for proc
345 * assumes size is at least one */
347 int get_nid_list(int index, unsigned int *nids, int size)
349 int hindex, nr_nids = 0;
351 /* only show current and children */
352 if (!nx_check(0, VS_ADMIN|VS_WATCH)) {
355 nids[nr_nids] = nx_current_nid();
359 for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
360 struct hlist_head *head = &nx_info_hash[hindex];
361 struct hlist_node *pos;
363 spin_lock(&nx_info_hash_lock);
364 hlist_for_each(pos, head) {
370 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
371 nids[nr_nids] = nxi->nx_id;
372 if (++nr_nids >= size) {
373 spin_unlock(&nx_info_hash_lock);
377 /* keep the lock time short */
378 spin_unlock(&nx_info_hash_lock);
387 * migrate task to new network
388 * gets nxi, puts old_nxi on change
391 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
393 struct nx_info *old_nxi;
399 vxdprintk(VXD_CBIT(nid, 5),
400 "nx_migrate_task(%p,%p[#%d.%d.%d])",
402 atomic_read(&nxi->nx_usecnt),
403 atomic_read(&nxi->nx_tasks));
405 if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
406 !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
409 if (nx_info_state(nxi, NXS_SHUTDOWN))
412 /* maybe disallow this completely? */
413 old_nxi = task_get_nx_info(p);
419 clr_nx_info(&p->nx_info);
420 claim_nx_info(nxi, p);
421 set_nx_info(&p->nx_info, nxi);
425 vxdprintk(VXD_CBIT(nid, 5),
426 "moved task %p into nxi:%p[#%d]",
430 release_nx_info(old_nxi, p);
433 put_nx_info(old_nxi);
440 #include <linux/netdevice.h>
441 #include <linux/inetdevice.h>
443 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
449 return addr_in_nx_info(nxi, ifa->ifa_local);
452 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
454 struct in_device *in_dev;
455 struct in_ifaddr **ifap;
456 struct in_ifaddr *ifa;
464 in_dev = in_dev_get(dev);
468 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
469 ifap = &ifa->ifa_next) {
470 if (addr_in_nx_info(nxi, ifa->ifa_local)) {
481 * check if address is covered by socket
483 * sk: the socket to check against
484 * addr: the address in question (must be != 0)
486 static inline int __addr_in_socket(const struct sock *sk, uint32_t addr)
488 struct nx_info *nxi = sk->sk_nx_info;
489 uint32_t saddr = inet_rcv_saddr(sk);
491 vxdprintk(VXD_CBIT(net, 5),
492 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
493 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
494 (sk->sk_socket?sk->sk_socket->flags:0));
497 /* direct address match */
498 return (saddr == addr);
500 /* match against nx_info */
501 return addr_in_nx_info(nxi, addr);
503 /* unrestricted any socket */
509 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, const struct sock *sk)
511 vxdprintk(VXD_CBIT(net, 2),
512 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
513 nxi, sk, VXD_QUAD(addr));
516 /* check real address */
517 return __addr_in_socket(sk, addr);
519 /* check against nx_info */
520 int i, n = nxi->nbipv4;
523 if (__addr_in_socket(sk, nxi->ipv4[i]))
527 /* check against any */
532 #endif /* CONFIG_INET */
534 void nx_set_persistent(struct nx_info *nxi)
536 vxdprintk(VXD_CBIT(nid, 6),
537 "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
540 claim_nx_info(nxi, NULL);
543 void nx_clear_persistent(struct nx_info *nxi)
545 vxdprintk(VXD_CBIT(nid, 6),
546 "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
548 release_nx_info(nxi, NULL);
552 void nx_update_persistent(struct nx_info *nxi)
554 if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
555 nx_set_persistent(nxi);
557 nx_clear_persistent(nxi);
560 /* vserver syscall commands below here */
562 /* taks nid and nx_info functions */
564 #include <asm/uaccess.h>
567 int vc_task_nid(uint32_t id, void __user *data)
572 struct task_struct *tsk;
574 if (!nx_check(0, VS_ADMIN|VS_WATCH))
577 read_lock(&tasklist_lock);
578 tsk = find_task_by_real_pid(id);
579 nid = (tsk) ? tsk->nid : -ESRCH;
580 read_unlock(&tasklist_lock);
583 nid = nx_current_nid();
588 int vc_nx_info(struct nx_info *nxi, void __user *data)
590 struct vcmd_nx_info_v0 vc_data;
592 vc_data.nid = nxi->nx_id;
594 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
600 /* network functions */
602 int vc_net_create(uint32_t nid, void __user *data)
604 struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
605 struct nx_info *new_nxi;
608 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
611 if ((nid > MAX_S_CONTEXT) && (nid != NX_DYNAMIC_ID))
616 new_nxi = __create_nx_info(nid);
618 return PTR_ERR(new_nxi);
621 new_nxi->nx_flags = vc_data.flagword;
624 if (vs_net_change(new_nxi, VSC_NETUP))
627 ret = nx_migrate_task(current, new_nxi);
631 /* return context id on success */
632 ret = new_nxi->nx_id;
634 /* get a reference for persistent contexts */
635 if ((vc_data.flagword & NXF_PERSISTENT))
636 nx_set_persistent(new_nxi);
638 release_nx_info(new_nxi, NULL);
639 put_nx_info(new_nxi);
644 int vc_net_migrate(struct nx_info *nxi, void __user *data)
646 return nx_migrate_task(current, nxi);
649 int vc_net_add(struct nx_info *nxi, void __user *data)
651 struct vcmd_net_addr_v0 vc_data;
652 int index, pos, ret = 0;
654 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
657 switch (vc_data.type) {
659 if ((vc_data.count < 1) || (vc_data.count > 4))
667 switch (vc_data.type) {
670 while ((index < vc_data.count) &&
671 ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
672 nxi->ipv4[pos] = vc_data.ip[index];
673 nxi->mask[pos] = vc_data.mask[index];
680 case NXA_TYPE_IPV4|NXA_MOD_BCAST:
681 nxi->v4_bcast = vc_data.ip[0];
692 int vc_net_remove(struct nx_info * nxi, void __user *data)
694 struct vcmd_net_addr_v0 vc_data;
696 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
699 switch (vc_data.type) {
710 int vc_get_nflags(struct nx_info *nxi, void __user *data)
712 struct vcmd_net_flags_v0 vc_data;
714 vc_data.flagword = nxi->nx_flags;
716 /* special STATE flag handling */
717 vc_data.mask = vs_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
719 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
724 int vc_set_nflags(struct nx_info *nxi, void __user *data)
726 struct vcmd_net_flags_v0 vc_data;
727 uint64_t mask, trigger;
729 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
732 /* special STATE flag handling */
733 mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
734 trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
736 nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
737 vc_data.flagword, mask);
738 if (trigger & NXF_PERSISTENT)
739 nx_update_persistent(nxi);
744 int vc_get_ncaps(struct nx_info *nxi, void __user *data)
746 struct vcmd_net_caps_v0 vc_data;
748 vc_data.ncaps = nxi->nx_ncaps;
749 vc_data.cmask = ~0UL;
751 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
756 int vc_set_ncaps(struct nx_info *nxi, void __user *data)
758 struct vcmd_net_caps_v0 vc_data;
760 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
763 nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
764 vc_data.ncaps, vc_data.cmask);
769 #include <linux/module.h>
771 EXPORT_SYMBOL_GPL(free_nx_info);
772 EXPORT_SYMBOL_GPL(unhash_nx_info);