2 * linux/kernel/vserver/network.c
4 * Virtual Server: Network Support
6 * Copyright (C) 2003-2005 Herbert Pƶtzl
8 * V0.01 broken out from vcontext V0.05
9 * V0.02 cleaned up implementation
10 * V0.03 added equiv nx commands
11 * V0.04 switch to RCU based hash
12 * V0.05 and back to locking again
16 #include <linux/slab.h>
17 #include <linux/vserver/network_cmd.h>
18 #include <linux/rcupdate.h>
21 #include <asm/errno.h>
26 * allocate an initialized nx_info struct
27 * doesn't make it visible (hash) */
29 static struct nx_info *__alloc_nx_info(nid_t nid)
31 struct nx_info *new = NULL;
33 vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
35 /* would this benefit from a slab cache? */
36 new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
40 memset (new, 0, sizeof(struct nx_info));
42 INIT_HLIST_NODE(&new->nx_hlist);
43 atomic_set(&new->nx_usecnt, 0);
44 atomic_set(&new->nx_tasks, 0);
47 new->nx_flags = NXF_INIT_SET;
49 /* rest of init goes here */
51 vxdprintk(VXD_CBIT(nid, 0),
52 "alloc_nx_info(%d) = %p", nid, new);
56 /* __dealloc_nx_info()
58 * final disposal of nx_info */
60 static void __dealloc_nx_info(struct nx_info *nxi)
62 vxdprintk(VXD_CBIT(nid, 0),
63 "dealloc_nx_info(%p)", nxi);
65 nxi->nx_hlist.next = LIST_POISON1;
68 BUG_ON(atomic_read(&nxi->nx_usecnt));
69 BUG_ON(atomic_read(&nxi->nx_tasks));
71 nxi->nx_state |= NXS_RELEASED;
75 static void __shutdown_nx_info(struct nx_info *nxi)
77 nxi->nx_state |= NXS_SHUTDOWN;
78 vs_net_change(nxi, VSC_NETDOWN);
83 void free_nx_info(struct nx_info *nxi)
85 /* context shutdown is mandatory */
86 BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
88 /* context must not be hashed */
89 BUG_ON(nxi->nx_state & NXS_HASHED);
91 BUG_ON(atomic_read(&nxi->nx_usecnt));
92 BUG_ON(atomic_read(&nxi->nx_tasks));
94 __dealloc_nx_info(nxi);
98 /* hash table for nx_info hash */
100 #define NX_HASH_SIZE 13
102 struct hlist_head nx_info_hash[NX_HASH_SIZE];
104 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
107 static inline unsigned int __hashval(nid_t nid)
109 return (nid % NX_HASH_SIZE);
116 * add the nxi to the global hash table
117 * requires the hash_lock to be held */
119 static inline void __hash_nx_info(struct nx_info *nxi)
121 struct hlist_head *head;
123 vxd_assert_lock(&nx_info_hash_lock);
124 vxdprintk(VXD_CBIT(nid, 4),
125 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
127 /* context must not be hashed */
128 BUG_ON(nx_info_state(nxi, NXS_HASHED));
130 nxi->nx_state |= NXS_HASHED;
131 head = &nx_info_hash[__hashval(nxi->nx_id)];
132 hlist_add_head(&nxi->nx_hlist, head);
135 /* __unhash_nx_info()
137 * remove the nxi from the global hash table
138 * requires the hash_lock to be held */
140 static inline void __unhash_nx_info(struct nx_info *nxi)
142 vxd_assert_lock(&nx_info_hash_lock);
143 vxdprintk(VXD_CBIT(nid, 4),
144 "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
146 /* context must be hashed */
147 BUG_ON(!nx_info_state(nxi, NXS_HASHED));
149 nxi->nx_state &= ~NXS_HASHED;
150 hlist_del(&nxi->nx_hlist);
154 /* __lookup_nx_info()
156 * requires the hash_lock to be held
157 * doesn't increment the nx_refcnt */
159 static inline struct nx_info *__lookup_nx_info(nid_t nid)
161 struct hlist_head *head = &nx_info_hash[__hashval(nid)];
162 struct hlist_node *pos;
165 vxd_assert_lock(&nx_info_hash_lock);
166 hlist_for_each(pos, head) {
167 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
169 if (nxi->nx_id == nid)
174 vxdprintk(VXD_CBIT(nid, 0),
175 "__lookup_nx_info(#%u): %p[#%u]",
176 nid, nxi, nxi?nxi->nx_id:0);
183 * find unused dynamic nid
184 * requires the hash_lock to be held */
186 static inline nid_t __nx_dynamic_id(void)
188 static nid_t seq = MAX_N_CONTEXT;
191 vxd_assert_lock(&nx_info_hash_lock);
193 if (++seq > MAX_N_CONTEXT)
195 if (!__lookup_nx_info(seq)) {
196 vxdprintk(VXD_CBIT(nid, 4),
197 "__nx_dynamic_id: [#%d]", seq);
200 } while (barrier != seq);
204 /* __create_nx_info()
206 * create the requested context
207 * get() and hash it */
209 static struct nx_info * __create_nx_info(int id)
211 struct nx_info *new, *nxi = NULL;
213 vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
215 if (!(new = __alloc_nx_info(id)))
216 return ERR_PTR(-ENOMEM);
218 /* required to make dynamic xids unique */
219 spin_lock(&nx_info_hash_lock);
221 /* dynamic context requested */
222 if (id == NX_DYNAMIC_ID) {
223 id = __nx_dynamic_id();
225 printk(KERN_ERR "no dynamic context available.\n");
226 nxi = ERR_PTR(-EAGAIN);
231 /* static context requested */
232 else if ((nxi = __lookup_nx_info(id))) {
233 vxdprintk(VXD_CBIT(nid, 0),
234 "create_nx_info(%d) = %p (already there)", id, nxi);
235 if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
236 nxi = ERR_PTR(-EBUSY);
238 nxi = ERR_PTR(-EEXIST);
241 /* dynamic nid creation blocker */
242 else if (id >= MIN_D_CONTEXT) {
243 vxdprintk(VXD_CBIT(nid, 0),
244 "create_nx_info(%d) (dynamic rejected)", id);
245 nxi = ERR_PTR(-EINVAL);
250 vxdprintk(VXD_CBIT(nid, 0),
251 "create_nx_info(%d) = %p (new)", id, new);
252 __hash_nx_info(get_nx_info(new));
253 nxi = new, new = NULL;
256 spin_unlock(&nx_info_hash_lock);
258 __dealloc_nx_info(new);
267 void unhash_nx_info(struct nx_info *nxi)
269 __shutdown_nx_info(nxi);
270 spin_lock(&nx_info_hash_lock);
271 __unhash_nx_info(nxi);
272 spin_unlock(&nx_info_hash_lock);
275 #ifdef CONFIG_VSERVER_LEGACYNET
277 struct nx_info *create_nx_info(void)
279 return __create_nx_info(NX_DYNAMIC_ID);
286 * search for a nx_info and get() it
287 * negative id means current */
289 struct nx_info *lookup_nx_info(int id)
291 struct nx_info *nxi = NULL;
294 nxi = get_nx_info(current->nx_info);
296 spin_lock(&nx_info_hash_lock);
297 nxi = get_nx_info(__lookup_nx_info(id));
298 spin_unlock(&nx_info_hash_lock);
305 * verify that nid is still hashed */
307 int nid_is_hashed(nid_t nid)
311 spin_lock(&nx_info_hash_lock);
312 hashed = (__lookup_nx_info(nid) != NULL);
313 spin_unlock(&nx_info_hash_lock);
318 #ifdef CONFIG_PROC_FS
320 int get_nid_list(int index, unsigned int *nids, int size)
322 int hindex, nr_nids = 0;
324 for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
325 struct hlist_head *head = &nx_info_hash[hindex];
326 struct hlist_node *pos;
328 spin_lock(&nx_info_hash_lock);
329 hlist_for_each(pos, head) {
335 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
336 nids[nr_nids] = nxi->nx_id;
337 if (++nr_nids >= size) {
338 spin_unlock(&nx_info_hash_lock);
342 /* keep the lock time short */
343 spin_unlock(&nx_info_hash_lock);
352 * migrate task to new network
353 * gets nxi, puts old_nxi on change
356 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
358 struct nx_info *old_nxi;
364 vxdprintk(VXD_CBIT(nid, 5),
365 "nx_migrate_task(%p,%p[#%d.%d.%d])",
367 atomic_read(&nxi->nx_usecnt),
368 atomic_read(&nxi->nx_tasks));
370 /* maybe disallow this completely? */
371 old_nxi = task_get_nx_info(p);
377 clr_nx_info(&p->nx_info);
378 claim_nx_info(nxi, p);
379 set_nx_info(&p->nx_info, nxi);
383 vxdprintk(VXD_CBIT(nid, 5),
384 "moved task %p into nxi:%p[#%d]",
388 release_nx_info(old_nxi, p);
390 put_nx_info(old_nxi);
397 #include <linux/netdevice.h>
398 #include <linux/inetdevice.h>
400 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
406 return addr_in_nx_info(nxi, ifa->ifa_local);
409 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
411 struct in_device *in_dev;
412 struct in_ifaddr **ifap;
413 struct in_ifaddr *ifa;
419 in_dev = in_dev_get(dev);
423 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
424 ifap = &ifa->ifa_next) {
425 if (addr_in_nx_info(nxi, ifa->ifa_local)) {
436 * check if address is covered by socket
438 * sk: the socket to check against
439 * addr: the address in question (must be != 0)
441 static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
443 struct nx_info *nxi = sk->sk_nx_info;
444 uint32_t saddr = inet_rcv_saddr(sk);
446 vxdprintk(VXD_CBIT(net, 5),
447 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
448 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
449 (sk->sk_socket?sk->sk_socket->flags:0));
452 /* direct address match */
453 return (saddr == addr);
455 /* match against nx_info */
456 return addr_in_nx_info(nxi, addr);
458 /* unrestricted any socket */
464 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
466 vxdprintk(VXD_CBIT(net, 2),
467 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
468 nxi, sk, VXD_QUAD(addr));
471 /* check real address */
472 return __addr_in_socket(sk, addr);
474 /* check against nx_info */
475 int i, n = nxi->nbipv4;
478 if (__addr_in_socket(sk, nxi->ipv4[i]))
482 /* check against any */
487 #endif /* CONFIG_INET */
489 void nx_set_persistent(struct nx_info *nxi)
492 claim_nx_info(nxi, current);
495 void nx_clear_persistent(struct nx_info *nxi)
497 vxdprintk(VXD_CBIT(nid, 6),
498 "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
500 release_nx_info(nxi, current);
504 void nx_update_persistent(struct nx_info *nxi)
506 if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
507 nx_set_persistent(nxi);
509 nx_clear_persistent(nxi);
512 /* vserver syscall commands below here */
514 /* taks nid and nx_info functions */
516 #include <asm/uaccess.h>
519 int vc_task_nid(uint32_t id, void __user *data)
524 struct task_struct *tsk;
526 if (!vx_check(0, VX_ADMIN|VX_WATCH))
529 read_lock(&tasklist_lock);
530 tsk = find_task_by_real_pid(id);
531 nid = (tsk) ? tsk->nid : -ESRCH;
532 read_unlock(&tasklist_lock);
535 nid = nx_current_nid();
540 int vc_nx_info(uint32_t id, void __user *data)
543 struct vcmd_nx_info_v0 vc_data;
545 if (!vx_check(0, VX_ADMIN))
547 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
550 nxi = lookup_nx_info(id);
554 vc_data.nid = nxi->nx_id;
557 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
563 /* network functions */
565 int vc_net_create(uint32_t nid, void __user *data)
567 struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
568 struct nx_info *new_nxi;
571 if (!capable(CAP_SYS_ADMIN))
573 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
576 if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID))
581 new_nxi = __create_nx_info(nid);
583 return PTR_ERR(new_nxi);
586 new_nxi->nx_flags = vc_data.flagword;
588 /* get a reference for persistent contexts */
589 if ((vc_data.flagword & NXF_PERSISTENT))
590 nx_set_persistent(new_nxi);
593 if (vs_net_change(new_nxi, VSC_NETUP))
595 ret = nx_migrate_task(current, new_nxi);
597 /* return context id on success */
598 ret = new_nxi->nx_id;
602 /* prepare for context disposal */
603 new_nxi->nx_state |= NXS_SHUTDOWN;
604 if ((vc_data.flagword & NXF_PERSISTENT))
605 nx_clear_persistent(new_nxi);
606 __unhash_nx_info(new_nxi);
608 put_nx_info(new_nxi);
613 int vc_net_migrate(uint32_t id, void __user *data)
617 if (!capable(CAP_SYS_ADMIN))
620 nxi = lookup_nx_info(id);
623 nx_migrate_task(current, nxi);
628 int vc_net_add(uint32_t nid, void __user *data)
630 struct vcmd_net_addr_v0 vc_data;
632 int index, pos, ret = 0;
634 if (!capable(CAP_SYS_ADMIN))
636 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
639 switch (vc_data.type) {
641 if ((vc_data.count < 1) || (vc_data.count > 4))
649 nxi = lookup_nx_info(nid);
653 switch (vc_data.type) {
656 while ((index < vc_data.count) &&
657 ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
658 nxi->ipv4[pos] = vc_data.ip[index];
659 nxi->mask[pos] = vc_data.mask[index];
666 case NXA_TYPE_IPV4|NXA_MOD_BCAST:
667 nxi->v4_bcast = vc_data.ip[0];
680 int vc_net_remove(uint32_t nid, void __user *data)
682 struct vcmd_net_addr_v0 vc_data;
686 if (!capable(CAP_SYS_ADMIN))
688 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
691 nxi = lookup_nx_info(nid);
695 switch (vc_data.type) {
709 int vc_get_nflags(uint32_t id, void __user *data)
712 struct vcmd_net_flags_v0 vc_data;
714 if (!capable(CAP_SYS_ADMIN))
717 nxi = lookup_nx_info(id);
721 vc_data.flagword = nxi->nx_flags;
723 /* special STATE flag handling */
724 vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
728 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
733 int vc_set_nflags(uint32_t id, void __user *data)
736 struct vcmd_net_flags_v0 vc_data;
737 uint64_t mask, trigger;
739 if (!capable(CAP_SYS_ADMIN))
741 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
744 nxi = lookup_nx_info(id);
748 /* special STATE flag handling */
749 mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
750 trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
752 nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
753 vc_data.flagword, mask);
754 if (trigger & NXF_PERSISTENT)
755 nx_update_persistent(nxi);
761 int vc_get_ncaps(uint32_t id, void __user *data)
764 struct vcmd_net_caps_v0 vc_data;
766 if (!capable(CAP_SYS_ADMIN))
769 nxi = lookup_nx_info(id);
773 vc_data.ncaps = nxi->nx_ncaps;
774 vc_data.cmask = ~0UL;
777 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
782 int vc_set_ncaps(uint32_t id, void __user *data)
785 struct vcmd_net_caps_v0 vc_data;
787 if (!capable(CAP_SYS_ADMIN))
789 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
792 nxi = lookup_nx_info(id);
796 nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
797 vc_data.ncaps, vc_data.cmask);
803 #include <linux/module.h>
805 EXPORT_SYMBOL_GPL(free_nx_info);
806 EXPORT_SYMBOL_GPL(unhash_nx_info);