Merge to kernel-2.6.20-1.2949.fc6.vs2.2.0.1
[linux-2.6.git] / kernel / vserver / network.c
1 /*
2  *  linux/kernel/vserver/network.c
3  *
4  *  Virtual Server: Network Support
5  *
6  *  Copyright (C) 2003-2007  Herbert Pƶtzl
7  *
8  *  V0.01  broken out from vcontext V0.05
9  *  V0.02  cleaned up implementation
10  *  V0.03  added equiv nx commands
11  *  V0.04  switch to RCU based hash
12  *  V0.05  and back to locking again
13  *  V0.06  changed vcmds to nxi arg
14  *  V0.07  have __create claim() the nxi
15  *
16  */
17
18 #include <linux/slab.h>
19 #include <linux/rcupdate.h>
20 #include <net/tcp.h>
21 #include <linux/vserver/network_cmd.h>
22
23 #include <asm/errno.h>
24 #include <linux/vserver/base.h>
25 #include <linux/vserver/network_cmd.h>
26
27
28 atomic_t nx_global_ctotal       = ATOMIC_INIT(0);
29 atomic_t nx_global_cactive      = ATOMIC_INIT(0);
30
31
32 /*      __alloc_nx_info()
33
34         * allocate an initialized nx_info struct
35         * doesn't make it visible (hash)                        */
36
37 static struct nx_info *__alloc_nx_info(nid_t nid)
38 {
39         struct nx_info *new = NULL;
40
41         vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
42
43         /* would this benefit from a slab cache? */
44         new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
45         if (!new)
46                 return 0;
47
48         memset (new, 0, sizeof(struct nx_info));
49         new->nx_id = nid;
50         INIT_HLIST_NODE(&new->nx_hlist);
51         atomic_set(&new->nx_usecnt, 0);
52         atomic_set(&new->nx_tasks, 0);
53         new->nx_state = 0;
54
55         new->nx_flags = NXF_INIT_SET;
56
57         /* rest of init goes here */
58
59         vxdprintk(VXD_CBIT(nid, 0),
60                 "alloc_nx_info(%d) = %p", nid, new);
61         atomic_inc(&nx_global_ctotal);
62         return new;
63 }
64
65 /*      __dealloc_nx_info()
66
67         * final disposal of nx_info                             */
68
69 static void __dealloc_nx_info(struct nx_info *nxi)
70 {
71         vxdprintk(VXD_CBIT(nid, 0),
72                 "dealloc_nx_info(%p)", nxi);
73
74         nxi->nx_hlist.next = LIST_POISON1;
75         nxi->nx_id = -1;
76
77         BUG_ON(atomic_read(&nxi->nx_usecnt));
78         BUG_ON(atomic_read(&nxi->nx_tasks));
79
80         nxi->nx_state |= NXS_RELEASED;
81         kfree(nxi);
82         atomic_dec(&nx_global_ctotal);
83 }
84
85 static void __shutdown_nx_info(struct nx_info *nxi)
86 {
87         nxi->nx_state |= NXS_SHUTDOWN;
88         vs_net_change(nxi, VSC_NETDOWN);
89 }
90
91 /*      exported stuff                                          */
92
93 void free_nx_info(struct nx_info *nxi)
94 {
95         /* context shutdown is mandatory */
96         BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
97
98         /* context must not be hashed */
99         BUG_ON(nxi->nx_state & NXS_HASHED);
100
101         BUG_ON(atomic_read(&nxi->nx_usecnt));
102         BUG_ON(atomic_read(&nxi->nx_tasks));
103
104         __dealloc_nx_info(nxi);
105 }
106
107
108 /*      hash table for nx_info hash */
109
110 #define NX_HASH_SIZE    13
111
112 struct hlist_head nx_info_hash[NX_HASH_SIZE];
113
114 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
115
116
117 static inline unsigned int __hashval(nid_t nid)
118 {
119         return (nid % NX_HASH_SIZE);
120 }
121
122
123
124 /*      __hash_nx_info()
125
126         * add the nxi to the global hash table
127         * requires the hash_lock to be held                     */
128
129 static inline void __hash_nx_info(struct nx_info *nxi)
130 {
131         struct hlist_head *head;
132
133         vxd_assert_lock(&nx_info_hash_lock);
134         vxdprintk(VXD_CBIT(nid, 4),
135                 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
136
137         /* context must not be hashed */
138         BUG_ON(nx_info_state(nxi, NXS_HASHED));
139
140         nxi->nx_state |= NXS_HASHED;
141         head = &nx_info_hash[__hashval(nxi->nx_id)];
142         hlist_add_head(&nxi->nx_hlist, head);
143         atomic_inc(&nx_global_cactive);
144 }
145
146 /*      __unhash_nx_info()
147
148         * remove the nxi from the global hash table
149         * requires the hash_lock to be held                     */
150
151 static inline void __unhash_nx_info(struct nx_info *nxi)
152 {
153         vxd_assert_lock(&nx_info_hash_lock);
154         vxdprintk(VXD_CBIT(nid, 4),
155                 "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
156                 atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
157
158         /* context must be hashed */
159         BUG_ON(!nx_info_state(nxi, NXS_HASHED));
160         /* but without tasks */
161         BUG_ON(atomic_read(&nxi->nx_tasks));
162
163         nxi->nx_state &= ~NXS_HASHED;
164         hlist_del(&nxi->nx_hlist);
165         atomic_dec(&nx_global_cactive);
166 }
167
168
169 /*      __lookup_nx_info()
170
171         * requires the hash_lock to be held
172         * doesn't increment the nx_refcnt                       */
173
174 static inline struct nx_info *__lookup_nx_info(nid_t nid)
175 {
176         struct hlist_head *head = &nx_info_hash[__hashval(nid)];
177         struct hlist_node *pos;
178         struct nx_info *nxi;
179
180         vxd_assert_lock(&nx_info_hash_lock);
181         hlist_for_each(pos, head) {
182                 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
183
184                 if (nxi->nx_id == nid)
185                         goto found;
186         }
187         nxi = NULL;
188 found:
189         vxdprintk(VXD_CBIT(nid, 0),
190                 "__lookup_nx_info(#%u): %p[#%u]",
191                 nid, nxi, nxi?nxi->nx_id:0);
192         return nxi;
193 }
194
195
196 /*      __nx_dynamic_id()
197
198         * find unused dynamic nid
199         * requires the hash_lock to be held                     */
200
201 static inline nid_t __nx_dynamic_id(void)
202 {
203         static nid_t seq = MAX_N_CONTEXT;
204         nid_t barrier = seq;
205
206         vxd_assert_lock(&nx_info_hash_lock);
207         do {
208                 if (++seq > MAX_N_CONTEXT)
209                         seq = MIN_D_CONTEXT;
210                 if (!__lookup_nx_info(seq)) {
211                         vxdprintk(VXD_CBIT(nid, 4),
212                                 "__nx_dynamic_id: [#%d]", seq);
213                         return seq;
214                 }
215         } while (barrier != seq);
216         return 0;
217 }
218
219 /*      __create_nx_info()
220
221         * create the requested context
222         * get(), claim() and hash it                            */
223
224 static struct nx_info * __create_nx_info(int id)
225 {
226         struct nx_info *new, *nxi = NULL;
227
228         vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
229
230         if (!(new = __alloc_nx_info(id)))
231                 return ERR_PTR(-ENOMEM);
232
233         /* required to make dynamic xids unique */
234         spin_lock(&nx_info_hash_lock);
235
236         /* dynamic context requested */
237         if (id == NX_DYNAMIC_ID) {
238 #ifdef  CONFIG_VSERVER_DYNAMIC_IDS
239                 id = __nx_dynamic_id();
240                 if (!id) {
241                         printk(KERN_ERR "no dynamic context available.\n");
242                         nxi = ERR_PTR(-EAGAIN);
243                         goto out_unlock;
244                 }
245                 new->nx_id = id;
246 #else
247                 printk(KERN_ERR "dynamic contexts disabled.\n");
248                 nxi = ERR_PTR(-EINVAL);
249                 goto out_unlock;
250 #endif
251         }
252         /* static context requested */
253         else if ((nxi = __lookup_nx_info(id))) {
254                 vxdprintk(VXD_CBIT(nid, 0),
255                         "create_nx_info(%d) = %p (already there)", id, nxi);
256                 if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
257                         nxi = ERR_PTR(-EBUSY);
258                 else
259                         nxi = ERR_PTR(-EEXIST);
260                 goto out_unlock;
261         }
262         /* dynamic nid creation blocker */
263         else if (id >= MIN_D_CONTEXT) {
264                 vxdprintk(VXD_CBIT(nid, 0),
265                         "create_nx_info(%d) (dynamic rejected)", id);
266                 nxi = ERR_PTR(-EINVAL);
267                 goto out_unlock;
268         }
269
270         /* new context */
271         vxdprintk(VXD_CBIT(nid, 0),
272                 "create_nx_info(%d) = %p (new)", id, new);
273         claim_nx_info(new, NULL);
274         __hash_nx_info(get_nx_info(new));
275         nxi = new, new = NULL;
276
277 out_unlock:
278         spin_unlock(&nx_info_hash_lock);
279         if (new)
280                 __dealloc_nx_info(new);
281         return nxi;
282 }
283
284
285
286 /*      exported stuff                                          */
287
288
289 void unhash_nx_info(struct nx_info *nxi)
290 {
291         __shutdown_nx_info(nxi);
292         spin_lock(&nx_info_hash_lock);
293         __unhash_nx_info(nxi);
294         spin_unlock(&nx_info_hash_lock);
295 }
296
297 #ifdef  CONFIG_VSERVER_LEGACYNET
298
299 struct nx_info *create_nx_info(void)
300 {
301         return __create_nx_info(NX_DYNAMIC_ID);
302 }
303
304 #endif
305
306 /*      lookup_nx_info()
307
308         * search for a nx_info and get() it
309         * negative id means current                             */
310
311 struct nx_info *lookup_nx_info(int id)
312 {
313         struct nx_info *nxi = NULL;
314
315         if (id < 0) {
316                 nxi = get_nx_info(current->nx_info);
317         } else if (id > 1) {
318                 spin_lock(&nx_info_hash_lock);
319                 nxi = get_nx_info(__lookup_nx_info(id));
320                 spin_unlock(&nx_info_hash_lock);
321         }
322         return nxi;
323 }
324
325 /*      nid_is_hashed()
326
327         * verify that nid is still hashed                       */
328
329 int nid_is_hashed(nid_t nid)
330 {
331         int hashed;
332
333         spin_lock(&nx_info_hash_lock);
334         hashed = (__lookup_nx_info(nid) != NULL);
335         spin_unlock(&nx_info_hash_lock);
336         return hashed;
337 }
338
339
340 #ifdef  CONFIG_PROC_FS
341
342 /*      get_nid_list()
343
344         * get a subset of hashed nids for proc
345         * assumes size is at least one                          */
346
347 int get_nid_list(int index, unsigned int *nids, int size)
348 {
349         int hindex, nr_nids = 0;
350
351         /* only show current and children */
352         if (!nx_check(0, VS_ADMIN|VS_WATCH)) {
353                 if (index > 0)
354                         return 0;
355                 nids[nr_nids] = nx_current_nid();
356                 return 1;
357         }
358
359         for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
360                 struct hlist_head *head = &nx_info_hash[hindex];
361                 struct hlist_node *pos;
362
363                 spin_lock(&nx_info_hash_lock);
364                 hlist_for_each(pos, head) {
365                         struct nx_info *nxi;
366
367                         if (--index > 0)
368                                 continue;
369
370                         nxi = hlist_entry(pos, struct nx_info, nx_hlist);
371                         nids[nr_nids] = nxi->nx_id;
372                         if (++nr_nids >= size) {
373                                 spin_unlock(&nx_info_hash_lock);
374                                 goto out;
375                         }
376                 }
377                 /* keep the lock time short */
378                 spin_unlock(&nx_info_hash_lock);
379         }
380 out:
381         return nr_nids;
382 }
383 #endif
384
385
386 /*
387  *      migrate task to new network
388  *      gets nxi, puts old_nxi on change
389  */
390
391 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
392 {
393         struct nx_info *old_nxi;
394         int ret = 0;
395
396         if (!p || !nxi)
397                 BUG();
398
399         vxdprintk(VXD_CBIT(nid, 5),
400                 "nx_migrate_task(%p,%p[#%d.%d.%d])",
401                 p, nxi, nxi->nx_id,
402                 atomic_read(&nxi->nx_usecnt),
403                 atomic_read(&nxi->nx_tasks));
404
405         if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
406                 !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
407                 return -EACCES;
408
409         if (nx_info_state(nxi, NXS_SHUTDOWN))
410                 return -EFAULT;
411
412         /* maybe disallow this completely? */
413         old_nxi = task_get_nx_info(p);
414         if (old_nxi == nxi)
415                 goto out;
416
417         task_lock(p);
418         if (old_nxi)
419                 clr_nx_info(&p->nx_info);
420         claim_nx_info(nxi, p);
421         set_nx_info(&p->nx_info, nxi);
422         p->nid = nxi->nx_id;
423         task_unlock(p);
424
425         vxdprintk(VXD_CBIT(nid, 5),
426                 "moved task %p into nxi:%p[#%d]",
427                 p, nxi, nxi->nx_id);
428
429         if (old_nxi)
430                 release_nx_info(old_nxi, p);
431         ret = 0;
432 out:
433         put_nx_info(old_nxi);
434         return ret;
435 }
436
437
438 #ifdef CONFIG_INET
439
440 #include <linux/netdevice.h>
441 #include <linux/inetdevice.h>
442
443 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
444 {
445         if (!nxi)
446                 return 1;
447         if (!ifa)
448                 return 0;
449         return addr_in_nx_info(nxi, ifa->ifa_local);
450 }
451
452 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
453 {
454         struct in_device *in_dev;
455         struct in_ifaddr **ifap;
456         struct in_ifaddr *ifa;
457         int ret = 0;
458
459         if (!nxi)
460                 return 1;
461
462         if (!dev)
463                 goto out;
464         in_dev = in_dev_get(dev);
465         if (!in_dev)
466                 goto out;
467
468         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
469                 ifap = &ifa->ifa_next) {
470                 if (addr_in_nx_info(nxi, ifa->ifa_local)) {
471                         ret = 1;
472                         break;
473                 }
474         }
475         in_dev_put(in_dev);
476 out:
477         return ret;
478 }
479
480 /*
481  *      check if address is covered by socket
482  *
483  *      sk:     the socket to check against
484  *      addr:   the address in question (must be != 0)
485  */
486 static inline int __addr_in_socket(const struct sock *sk, uint32_t addr)
487 {
488         struct nx_info *nxi = sk->sk_nx_info;
489         uint32_t saddr = inet_rcv_saddr(sk);
490
491         vxdprintk(VXD_CBIT(net, 5),
492                 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
493                 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
494                 (sk->sk_socket?sk->sk_socket->flags:0));
495
496         if (saddr) {
497                 /* direct address match */
498                 return (saddr == addr);
499         } else if (nxi) {
500                 /* match against nx_info */
501                 return addr_in_nx_info(nxi, addr);
502         } else {
503                 /* unrestricted any socket */
504                 return 1;
505         }
506 }
507
508
509 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, const struct sock *sk)
510 {
511         vxdprintk(VXD_CBIT(net, 2),
512                 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
513                 nxi, sk, VXD_QUAD(addr));
514
515         if (addr) {
516                 /* check real address */
517                 return __addr_in_socket(sk, addr);
518         } else if (nxi) {
519                 /* check against nx_info */
520                 int i, n = nxi->nbipv4;
521
522                 for (i=0; i<n; i++)
523                         if (__addr_in_socket(sk, nxi->ipv4[i]))
524                                 return 1;
525                 return 0;
526         } else {
527                 /* check against any */
528                 return 1;
529         }
530 }
531
532 #endif /* CONFIG_INET */
533
534 void nx_set_persistent(struct nx_info *nxi)
535 {
536         vxdprintk(VXD_CBIT(nid, 6),
537                 "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
538
539         get_nx_info(nxi);
540         claim_nx_info(nxi, NULL);
541 }
542
543 void nx_clear_persistent(struct nx_info *nxi)
544 {
545         vxdprintk(VXD_CBIT(nid, 6),
546                 "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
547
548         release_nx_info(nxi, NULL);
549         put_nx_info(nxi);
550 }
551
552 void nx_update_persistent(struct nx_info *nxi)
553 {
554         if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
555                 nx_set_persistent(nxi);
556         else
557                 nx_clear_persistent(nxi);
558 }
559
560 /* vserver syscall commands below here */
561
562 /* taks nid and nx_info functions */
563
564 #include <asm/uaccess.h>
565
566
567 int vc_task_nid(uint32_t id, void __user *data)
568 {
569         nid_t nid;
570
571         if (id) {
572                 struct task_struct *tsk;
573
574                 if (!nx_check(0, VS_ADMIN|VS_WATCH))
575                         return -EPERM;
576
577                 read_lock(&tasklist_lock);
578                 tsk = find_task_by_real_pid(id);
579                 nid = (tsk) ? tsk->nid : -ESRCH;
580                 read_unlock(&tasklist_lock);
581         }
582         else
583                 nid = nx_current_nid();
584         return nid;
585 }
586
587
588 int vc_nx_info(struct nx_info *nxi, void __user *data)
589 {
590         struct vcmd_nx_info_v0 vc_data;
591
592         vc_data.nid = nxi->nx_id;
593
594         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
595                 return -EFAULT;
596         return 0;
597 }
598
599
600 /* network functions */
601
602 int vc_net_create(uint32_t nid, void __user *data)
603 {
604         struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
605         struct nx_info *new_nxi;
606         int ret;
607
608         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
609                 return -EFAULT;
610
611         if ((nid > MAX_S_CONTEXT) && (nid != NX_DYNAMIC_ID))
612                 return -EINVAL;
613         if (nid < 2)
614                 return -EINVAL;
615
616         new_nxi = __create_nx_info(nid);
617         if (IS_ERR(new_nxi))
618                 return PTR_ERR(new_nxi);
619
620         /* initial flags */
621         new_nxi->nx_flags = vc_data.flagword;
622
623         ret = -ENOEXEC;
624         if (vs_net_change(new_nxi, VSC_NETUP))
625                 goto out;
626
627         ret = nx_migrate_task(current, new_nxi);
628         if (ret)
629                 goto out;
630
631         /* return context id on success */
632         ret = new_nxi->nx_id;
633
634         /* get a reference for persistent contexts */
635         if ((vc_data.flagword & NXF_PERSISTENT))
636                 nx_set_persistent(new_nxi);
637 out:
638         release_nx_info(new_nxi, NULL);
639         put_nx_info(new_nxi);
640         return ret;
641 }
642
643
644 int vc_net_migrate(struct nx_info *nxi, void __user *data)
645 {
646         return nx_migrate_task(current, nxi);
647 }
648
649 int vc_net_add(struct nx_info *nxi, void __user *data)
650 {
651         struct vcmd_net_addr_v0 vc_data;
652         int index, pos, ret = 0;
653
654         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
655                 return -EFAULT;
656
657         switch (vc_data.type) {
658         case NXA_TYPE_IPV4:
659                 if ((vc_data.count < 1) || (vc_data.count > 4))
660                         return -EINVAL;
661                 break;
662
663         default:
664                 break;
665         }
666
667         switch (vc_data.type) {
668         case NXA_TYPE_IPV4:
669                 index = 0;
670                 while ((index < vc_data.count) &&
671                         ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
672                         nxi->ipv4[pos] = vc_data.ip[index];
673                         nxi->mask[pos] = vc_data.mask[index];
674                         index++;
675                         nxi->nbipv4++;
676                 }
677                 ret = index;
678                 break;
679
680         case NXA_TYPE_IPV4|NXA_MOD_BCAST:
681                 nxi->v4_bcast = vc_data.ip[0];
682                 ret = 1;
683                 break;
684
685         default:
686                 ret = -EINVAL;
687                 break;
688         }
689         return ret;
690 }
691
692 int vc_net_remove(struct nx_info * nxi, void __user *data)
693 {
694         struct vcmd_net_addr_v0 vc_data;
695
696         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
697                 return -EFAULT;
698
699         switch (vc_data.type) {
700         case NXA_TYPE_ANY:
701                 nxi->nbipv4 = 0;
702                 break;
703
704         default:
705                 return -EINVAL;
706         }
707         return 0;
708 }
709
710 int vc_get_nflags(struct nx_info *nxi, void __user *data)
711 {
712         struct vcmd_net_flags_v0 vc_data;
713
714         vc_data.flagword = nxi->nx_flags;
715
716         /* special STATE flag handling */
717         vc_data.mask = vs_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
718
719         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
720                 return -EFAULT;
721         return 0;
722 }
723
724 int vc_set_nflags(struct nx_info *nxi, void __user *data)
725 {
726         struct vcmd_net_flags_v0 vc_data;
727         uint64_t mask, trigger;
728
729         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
730                 return -EFAULT;
731
732         /* special STATE flag handling */
733         mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
734         trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
735
736         nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
737                 vc_data.flagword, mask);
738         if (trigger & NXF_PERSISTENT)
739                 nx_update_persistent(nxi);
740
741         return 0;
742 }
743
744 int vc_get_ncaps(struct nx_info *nxi, void __user *data)
745 {
746         struct vcmd_net_caps_v0 vc_data;
747
748         vc_data.ncaps = nxi->nx_ncaps;
749         vc_data.cmask = ~0UL;
750
751         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
752                 return -EFAULT;
753         return 0;
754 }
755
756 int vc_set_ncaps(struct nx_info *nxi, void __user *data)
757 {
758         struct vcmd_net_caps_v0 vc_data;
759
760         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
761                 return -EFAULT;
762
763         nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
764                 vc_data.ncaps, vc_data.cmask);
765         return 0;
766 }
767
768
769 #include <linux/module.h>
770
771 EXPORT_SYMBOL_GPL(free_nx_info);
772 EXPORT_SYMBOL_GPL(unhash_nx_info);
773