2 * linux/kernel/vserver/network.c
4 * Virtual Server: Network Support
6 * Copyright (C) 2003-2006 Herbert Pƶtzl
8 * V0.01 broken out from vcontext V0.05
9 * V0.02 cleaned up implementation
10 * V0.03 added equiv nx commands
11 * V0.04 switch to RCU based hash
12 * V0.05 and back to locking again
13 * V0.06 have __create claim() the nxi
17 #include <linux/slab.h>
18 #include <linux/rcupdate.h>
20 #include <linux/vserver/network_cmd.h>
22 #include <asm/errno.h>
27 * allocate an initialized nx_info struct
28 * doesn't make it visible (hash) */
30 static struct nx_info *__alloc_nx_info(nid_t nid)
32 struct nx_info *new = NULL;
34 vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
36 /* would this benefit from a slab cache? */
37 new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
41 memset (new, 0, sizeof(struct nx_info));
43 INIT_HLIST_NODE(&new->nx_hlist);
44 atomic_set(&new->nx_usecnt, 0);
45 atomic_set(&new->nx_tasks, 0);
48 new->nx_flags = NXF_INIT_SET;
50 /* rest of init goes here */
52 vxdprintk(VXD_CBIT(nid, 0),
53 "alloc_nx_info(%d) = %p", nid, new);
57 /* __dealloc_nx_info()
59 * final disposal of nx_info */
61 static void __dealloc_nx_info(struct nx_info *nxi)
63 vxdprintk(VXD_CBIT(nid, 0),
64 "dealloc_nx_info(%p)", nxi);
66 nxi->nx_hlist.next = LIST_POISON1;
69 BUG_ON(atomic_read(&nxi->nx_usecnt));
70 BUG_ON(atomic_read(&nxi->nx_tasks));
72 nxi->nx_state |= NXS_RELEASED;
76 static void __shutdown_nx_info(struct nx_info *nxi)
78 nxi->nx_state |= NXS_SHUTDOWN;
79 vs_net_change(nxi, VSC_NETDOWN);
84 void free_nx_info(struct nx_info *nxi)
86 /* context shutdown is mandatory */
87 BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
89 /* context must not be hashed */
90 BUG_ON(nxi->nx_state & NXS_HASHED);
92 BUG_ON(atomic_read(&nxi->nx_usecnt));
93 BUG_ON(atomic_read(&nxi->nx_tasks));
95 __dealloc_nx_info(nxi);
99 /* hash table for nx_info hash */
101 #define NX_HASH_SIZE 13
103 struct hlist_head nx_info_hash[NX_HASH_SIZE];
105 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
108 static inline unsigned int __hashval(nid_t nid)
110 return (nid % NX_HASH_SIZE);
117 * add the nxi to the global hash table
118 * requires the hash_lock to be held */
120 static inline void __hash_nx_info(struct nx_info *nxi)
122 struct hlist_head *head;
124 vxd_assert_lock(&nx_info_hash_lock);
125 vxdprintk(VXD_CBIT(nid, 4),
126 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
128 /* context must not be hashed */
129 BUG_ON(nx_info_state(nxi, NXS_HASHED));
131 nxi->nx_state |= NXS_HASHED;
132 head = &nx_info_hash[__hashval(nxi->nx_id)];
133 hlist_add_head(&nxi->nx_hlist, head);
136 /* __unhash_nx_info()
138 * remove the nxi from the global hash table
139 * requires the hash_lock to be held */
141 static inline void __unhash_nx_info(struct nx_info *nxi)
143 vxdprintk(VXD_CBIT(nid, 4),
144 "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
146 spin_lock(&nx_info_hash_lock);
147 /* context must be hashed */
148 BUG_ON(!nx_info_state(nxi, NXS_HASHED));
150 nxi->nx_state &= ~NXS_HASHED;
151 hlist_del(&nxi->nx_hlist);
152 spin_unlock(&nx_info_hash_lock);
156 /* __lookup_nx_info()
158 * requires the hash_lock to be held
159 * doesn't increment the nx_refcnt */
161 static inline struct nx_info *__lookup_nx_info(nid_t nid)
163 struct hlist_head *head = &nx_info_hash[__hashval(nid)];
164 struct hlist_node *pos;
167 vxd_assert_lock(&nx_info_hash_lock);
168 hlist_for_each(pos, head) {
169 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
171 if (nxi->nx_id == nid)
176 vxdprintk(VXD_CBIT(nid, 0),
177 "__lookup_nx_info(#%u): %p[#%u]",
178 nid, nxi, nxi?nxi->nx_id:0);
185 * find unused dynamic nid
186 * requires the hash_lock to be held */
188 static inline nid_t __nx_dynamic_id(void)
190 static nid_t seq = MAX_N_CONTEXT;
193 vxd_assert_lock(&nx_info_hash_lock);
195 if (++seq > MAX_N_CONTEXT)
197 if (!__lookup_nx_info(seq)) {
198 vxdprintk(VXD_CBIT(nid, 4),
199 "__nx_dynamic_id: [#%d]", seq);
202 } while (barrier != seq);
206 /* __create_nx_info()
208 * create the requested context
209 * get(), claim() and hash it */
211 static struct nx_info * __create_nx_info(int id)
213 struct nx_info *new, *nxi = NULL;
215 vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
217 if (!(new = __alloc_nx_info(id)))
218 return ERR_PTR(-ENOMEM);
220 /* required to make dynamic xids unique */
221 spin_lock(&nx_info_hash_lock);
223 /* dynamic context requested */
224 if (id == NX_DYNAMIC_ID) {
225 id = __nx_dynamic_id();
227 printk(KERN_ERR "no dynamic context available.\n");
228 nxi = ERR_PTR(-EAGAIN);
233 /* static context requested */
234 else if ((nxi = __lookup_nx_info(id))) {
235 vxdprintk(VXD_CBIT(nid, 0),
236 "create_nx_info(%d) = %p (already there)", id, nxi);
237 if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
238 nxi = ERR_PTR(-EBUSY);
240 nxi = ERR_PTR(-EEXIST);
243 /* dynamic nid creation blocker */
244 else if (id >= MIN_D_CONTEXT) {
245 vxdprintk(VXD_CBIT(nid, 0),
246 "create_nx_info(%d) (dynamic rejected)", id);
247 nxi = ERR_PTR(-EINVAL);
252 vxdprintk(VXD_CBIT(nid, 0),
253 "create_nx_info(%d) = %p (new)", id, new);
254 claim_nx_info(new, NULL);
255 __hash_nx_info(get_nx_info(new));
256 nxi = new, new = NULL;
259 spin_unlock(&nx_info_hash_lock);
261 __dealloc_nx_info(new);
270 void unhash_nx_info(struct nx_info *nxi)
272 __shutdown_nx_info(nxi);
273 __unhash_nx_info(nxi);
276 #ifdef CONFIG_VSERVER_LEGACYNET
278 struct nx_info *create_nx_info(void)
280 return __create_nx_info(NX_DYNAMIC_ID);
287 * search for a nx_info and get() it
288 * negative id means current */
290 struct nx_info *lookup_nx_info(int id)
292 struct nx_info *nxi = NULL;
295 nxi = get_nx_info(current->nx_info);
297 spin_lock(&nx_info_hash_lock);
298 nxi = get_nx_info(__lookup_nx_info(id));
299 spin_unlock(&nx_info_hash_lock);
306 * verify that nid is still hashed */
308 int nid_is_hashed(nid_t nid)
312 spin_lock(&nx_info_hash_lock);
313 hashed = (__lookup_nx_info(nid) != NULL);
314 spin_unlock(&nx_info_hash_lock);
319 #ifdef CONFIG_PROC_FS
321 int get_nid_list(int index, unsigned int *nids, int size)
323 int hindex, nr_nids = 0;
325 for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
326 struct hlist_head *head = &nx_info_hash[hindex];
327 struct hlist_node *pos;
329 spin_lock(&nx_info_hash_lock);
330 hlist_for_each(pos, head) {
336 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
337 nids[nr_nids] = nxi->nx_id;
338 if (++nr_nids >= size) {
339 spin_unlock(&nx_info_hash_lock);
343 /* keep the lock time short */
344 spin_unlock(&nx_info_hash_lock);
353 * migrate task to new network
354 * gets nxi, puts old_nxi on change
357 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
359 struct nx_info *old_nxi;
365 vxdprintk(VXD_CBIT(nid, 5),
366 "nx_migrate_task(%p,%p[#%d.%d.%d])",
368 atomic_read(&nxi->nx_usecnt),
369 atomic_read(&nxi->nx_tasks));
371 /* maybe disallow this completely? */
372 old_nxi = task_get_nx_info(p);
378 clr_nx_info(&p->nx_info);
379 claim_nx_info(nxi, p);
380 set_nx_info(&p->nx_info, nxi);
384 vxdprintk(VXD_CBIT(nid, 5),
385 "moved task %p into nxi:%p[#%d]",
389 release_nx_info(old_nxi, p);
392 put_nx_info(old_nxi);
399 #include <linux/netdevice.h>
400 #include <linux/inetdevice.h>
402 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
408 return addr_in_nx_info(nxi, ifa->ifa_local);
411 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
413 struct in_device *in_dev;
414 struct in_ifaddr **ifap;
415 struct in_ifaddr *ifa;
421 in_dev = in_dev_get(dev);
425 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
426 ifap = &ifa->ifa_next) {
427 if (addr_in_nx_info(nxi, ifa->ifa_local)) {
438 * check if address is covered by socket
440 * sk: the socket to check against
441 * addr: the address in question (must be != 0)
443 static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
445 struct nx_info *nxi = sk->sk_nx_info;
446 uint32_t saddr = inet_rcv_saddr(sk);
448 vxdprintk(VXD_CBIT(net, 5),
449 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
450 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
451 (sk->sk_socket?sk->sk_socket->flags:0));
454 /* direct address match */
455 return (saddr == addr);
457 /* match against nx_info */
458 return addr_in_nx_info(nxi, addr);
460 /* unrestricted any socket */
466 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
468 vxdprintk(VXD_CBIT(net, 2),
469 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
470 nxi, sk, VXD_QUAD(addr));
473 /* check real address */
474 return __addr_in_socket(sk, addr);
476 /* check against nx_info */
477 int i, n = nxi->nbipv4;
480 if (__addr_in_socket(sk, nxi->ipv4[i]))
484 /* check against any */
489 #endif /* CONFIG_INET */
491 void nx_set_persistent(struct nx_info *nxi)
493 vxdprintk(VXD_CBIT(nid, 6),
494 "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
497 claim_nx_info(nxi, NULL);
500 void nx_clear_persistent(struct nx_info *nxi)
502 vxdprintk(VXD_CBIT(nid, 6),
503 "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
505 release_nx_info(nxi, NULL);
509 void nx_update_persistent(struct nx_info *nxi)
511 if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
512 nx_set_persistent(nxi);
514 nx_clear_persistent(nxi);
517 /* vserver syscall commands below here */
519 /* taks nid and nx_info functions */
521 #include <asm/uaccess.h>
524 int vc_task_nid(uint32_t id, void __user *data)
529 struct task_struct *tsk;
531 if (!vx_check(0, VX_ADMIN|VX_WATCH))
534 read_lock(&tasklist_lock);
535 tsk = find_task_by_real_pid(id);
536 nid = (tsk) ? tsk->nid : -ESRCH;
537 read_unlock(&tasklist_lock);
540 nid = nx_current_nid();
545 int vc_nx_info(uint32_t id, void __user *data)
548 struct vcmd_nx_info_v0 vc_data;
550 if (!vx_check(0, VX_ADMIN))
552 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
555 nxi = lookup_nx_info(id);
559 vc_data.nid = nxi->nx_id;
562 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
568 /* network functions */
570 int vc_net_create(uint32_t nid, void __user *data)
572 struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
573 struct nx_info *new_nxi;
576 if (!capable(CAP_SYS_ADMIN))
578 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
581 if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID))
586 new_nxi = __create_nx_info(nid);
588 return PTR_ERR(new_nxi);
591 new_nxi->nx_flags = vc_data.flagword;
594 if (vs_net_change(new_nxi, VSC_NETUP))
597 ret = nx_migrate_task(current, new_nxi);
601 /* return context id on success */
602 ret = new_nxi->nx_id;
604 /* get a reference for persistent contexts */
605 if ((vc_data.flagword & NXF_PERSISTENT))
606 nx_set_persistent(new_nxi);
608 release_nx_info(new_nxi, NULL);
609 put_nx_info(new_nxi);
614 int vc_net_migrate(uint32_t id, void __user *data)
618 if (!capable(CAP_SYS_ADMIN))
621 nxi = lookup_nx_info(id);
624 nx_migrate_task(current, nxi);
629 int vc_net_add(uint32_t nid, void __user *data)
631 struct vcmd_net_addr_v0 vc_data;
633 int index, pos, ret = 0;
635 if (!capable(CAP_SYS_ADMIN))
637 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
640 switch (vc_data.type) {
642 if ((vc_data.count < 1) || (vc_data.count > 4))
650 nxi = lookup_nx_info(nid);
654 switch (vc_data.type) {
657 while ((index < vc_data.count) &&
658 ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
659 nxi->ipv4[pos] = vc_data.ip[index];
660 nxi->mask[pos] = vc_data.mask[index];
667 case NXA_TYPE_IPV4|NXA_MOD_BCAST:
668 nxi->v4_bcast = vc_data.ip[0];
681 int vc_net_remove(uint32_t nid, void __user *data)
683 struct vcmd_net_addr_v0 vc_data;
687 if (!capable(CAP_SYS_ADMIN))
689 if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
692 nxi = lookup_nx_info(nid);
696 switch (vc_data.type) {
710 int vc_get_nflags(uint32_t id, void __user *data)
713 struct vcmd_net_flags_v0 vc_data;
715 if (!capable(CAP_SYS_ADMIN))
718 nxi = lookup_nx_info(id);
722 vc_data.flagword = nxi->nx_flags;
724 /* special STATE flag handling */
725 vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
729 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
734 int vc_set_nflags(uint32_t id, void __user *data)
737 struct vcmd_net_flags_v0 vc_data;
738 uint64_t mask, trigger;
740 if (!capable(CAP_SYS_ADMIN))
742 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
745 nxi = lookup_nx_info(id);
749 /* special STATE flag handling */
750 mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
751 trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
753 nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
754 vc_data.flagword, mask);
755 if (trigger & NXF_PERSISTENT)
756 nx_update_persistent(nxi);
762 int vc_get_ncaps(uint32_t id, void __user *data)
765 struct vcmd_net_caps_v0 vc_data;
767 if (!capable(CAP_SYS_ADMIN))
770 nxi = lookup_nx_info(id);
774 vc_data.ncaps = nxi->nx_ncaps;
775 vc_data.cmask = ~0UL;
778 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
783 int vc_set_ncaps(uint32_t id, void __user *data)
786 struct vcmd_net_caps_v0 vc_data;
788 if (!capable(CAP_SYS_ADMIN))
790 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
793 nxi = lookup_nx_info(id);
797 nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
798 vc_data.ncaps, vc_data.cmask);
804 #include <linux/module.h>
806 EXPORT_SYMBOL_GPL(free_nx_info);
807 EXPORT_SYMBOL_GPL(unhash_nx_info);