fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / kernel / vserver / network.c
1 /*
2  *  linux/kernel/vserver/network.c
3  *
4  *  Virtual Server: Network Support
5  *
6  *  Copyright (C) 2003-2007  Herbert Pƶtzl
7  *
8  *  V0.01  broken out from vcontext V0.05
9  *  V0.02  cleaned up implementation
10  *  V0.03  added equiv nx commands
11  *  V0.04  switch to RCU based hash
12  *  V0.05  and back to locking again
13  *  V0.06  changed vcmds to nxi arg
14  *  V0.07  have __create claim() the nxi
15  *
16  */
17
18 #include <linux/slab.h>
19 #include <linux/rcupdate.h>
20 #include <net/tcp.h>
21
22 #include <asm/errno.h>
23 #include <linux/vserver/base.h>
24 #include <linux/vserver/network_cmd.h>
25
26
27 atomic_t nx_global_ctotal       = ATOMIC_INIT(0);
28 atomic_t nx_global_cactive      = ATOMIC_INIT(0);
29
30
31 /*      __alloc_nx_info()
32
33         * allocate an initialized nx_info struct
34         * doesn't make it visible (hash)                        */
35
36 static struct nx_info *__alloc_nx_info(nid_t nid)
37 {
38         struct nx_info *new = NULL;
39
40         vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
41
42         /* would this benefit from a slab cache? */
43         new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
44         if (!new)
45                 return 0;
46
47         memset (new, 0, sizeof(struct nx_info));
48         new->nx_id = nid;
49         INIT_HLIST_NODE(&new->nx_hlist);
50         atomic_set(&new->nx_usecnt, 0);
51         atomic_set(&new->nx_tasks, 0);
52         new->nx_state = 0;
53
54         new->nx_flags = NXF_INIT_SET;
55
56         /* rest of init goes here */
57
58         vxdprintk(VXD_CBIT(nid, 0),
59                 "alloc_nx_info(%d) = %p", nid, new);
60         atomic_inc(&nx_global_ctotal);
61         return new;
62 }
63
64 /*      __dealloc_nx_info()
65
66         * final disposal of nx_info                             */
67
68 static void __dealloc_nx_info(struct nx_info *nxi)
69 {
70         vxdprintk(VXD_CBIT(nid, 0),
71                 "dealloc_nx_info(%p)", nxi);
72
73         nxi->nx_hlist.next = LIST_POISON1;
74         nxi->nx_id = -1;
75
76         BUG_ON(atomic_read(&nxi->nx_usecnt));
77         BUG_ON(atomic_read(&nxi->nx_tasks));
78
79         nxi->nx_state |= NXS_RELEASED;
80         kfree(nxi);
81         atomic_dec(&nx_global_ctotal);
82 }
83
84 static void __shutdown_nx_info(struct nx_info *nxi)
85 {
86         nxi->nx_state |= NXS_SHUTDOWN;
87         vs_net_change(nxi, VSC_NETDOWN);
88 }
89
90 /*      exported stuff                                          */
91
92 void free_nx_info(struct nx_info *nxi)
93 {
94         /* context shutdown is mandatory */
95         BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
96
97         /* context must not be hashed */
98         BUG_ON(nxi->nx_state & NXS_HASHED);
99
100         BUG_ON(atomic_read(&nxi->nx_usecnt));
101         BUG_ON(atomic_read(&nxi->nx_tasks));
102
103         __dealloc_nx_info(nxi);
104 }
105
106
107 /*      hash table for nx_info hash */
108
109 #define NX_HASH_SIZE    13
110
111 struct hlist_head nx_info_hash[NX_HASH_SIZE];
112
113 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
114
115
116 static inline unsigned int __hashval(nid_t nid)
117 {
118         return (nid % NX_HASH_SIZE);
119 }
120
121
122
123 /*      __hash_nx_info()
124
125         * add the nxi to the global hash table
126         * requires the hash_lock to be held                     */
127
128 static inline void __hash_nx_info(struct nx_info *nxi)
129 {
130         struct hlist_head *head;
131
132         vxd_assert_lock(&nx_info_hash_lock);
133         vxdprintk(VXD_CBIT(nid, 4),
134                 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
135
136         /* context must not be hashed */
137         BUG_ON(nx_info_state(nxi, NXS_HASHED));
138
139         nxi->nx_state |= NXS_HASHED;
140         head = &nx_info_hash[__hashval(nxi->nx_id)];
141         hlist_add_head(&nxi->nx_hlist, head);
142         atomic_inc(&nx_global_cactive);
143 }
144
145 /*      __unhash_nx_info()
146
147         * remove the nxi from the global hash table
148         * requires the hash_lock to be held                     */
149
150 static inline void __unhash_nx_info(struct nx_info *nxi)
151 {
152         vxd_assert_lock(&nx_info_hash_lock);
153         vxdprintk(VXD_CBIT(nid, 4),
154                 "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
155                 atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
156
157         /* context must be hashed */
158         BUG_ON(!nx_info_state(nxi, NXS_HASHED));
159         /* but without tasks */
160         BUG_ON(atomic_read(&nxi->nx_tasks));
161
162         nxi->nx_state &= ~NXS_HASHED;
163         hlist_del(&nxi->nx_hlist);
164         atomic_dec(&nx_global_cactive);
165 }
166
167
168 /*      __lookup_nx_info()
169
170         * requires the hash_lock to be held
171         * doesn't increment the nx_refcnt                       */
172
173 static inline struct nx_info *__lookup_nx_info(nid_t nid)
174 {
175         struct hlist_head *head = &nx_info_hash[__hashval(nid)];
176         struct hlist_node *pos;
177         struct nx_info *nxi;
178
179         vxd_assert_lock(&nx_info_hash_lock);
180         hlist_for_each(pos, head) {
181                 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
182
183                 if (nxi->nx_id == nid)
184                         goto found;
185         }
186         nxi = NULL;
187 found:
188         vxdprintk(VXD_CBIT(nid, 0),
189                 "__lookup_nx_info(#%u): %p[#%u]",
190                 nid, nxi, nxi?nxi->nx_id:0);
191         return nxi;
192 }
193
194
195 /*      __nx_dynamic_id()
196
197         * find unused dynamic nid
198         * requires the hash_lock to be held                     */
199
200 static inline nid_t __nx_dynamic_id(void)
201 {
202         static nid_t seq = MAX_N_CONTEXT;
203         nid_t barrier = seq;
204
205         vxd_assert_lock(&nx_info_hash_lock);
206         do {
207                 if (++seq > MAX_N_CONTEXT)
208                         seq = MIN_D_CONTEXT;
209                 if (!__lookup_nx_info(seq)) {
210                         vxdprintk(VXD_CBIT(nid, 4),
211                                 "__nx_dynamic_id: [#%d]", seq);
212                         return seq;
213                 }
214         } while (barrier != seq);
215         return 0;
216 }
217
218 /*      __create_nx_info()
219
220         * create the requested context
221         * get(), claim() and hash it                            */
222
223 static struct nx_info * __create_nx_info(int id)
224 {
225         struct nx_info *new, *nxi = NULL;
226
227         vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
228
229         if (!(new = __alloc_nx_info(id)))
230                 return ERR_PTR(-ENOMEM);
231
232         /* required to make dynamic xids unique */
233         spin_lock(&nx_info_hash_lock);
234
235         /* dynamic context requested */
236         if (id == NX_DYNAMIC_ID) {
237 #ifdef  CONFIG_VSERVER_DYNAMIC_IDS
238                 id = __nx_dynamic_id();
239                 if (!id) {
240                         printk(KERN_ERR "no dynamic context available.\n");
241                         nxi = ERR_PTR(-EAGAIN);
242                         goto out_unlock;
243                 }
244                 new->nx_id = id;
245 #else
246                 printk(KERN_ERR "dynamic contexts disabled.\n");
247                 nxi = ERR_PTR(-EINVAL);
248                 goto out_unlock;
249 #endif
250         }
251         /* static context requested */
252         else if ((nxi = __lookup_nx_info(id))) {
253                 vxdprintk(VXD_CBIT(nid, 0),
254                         "create_nx_info(%d) = %p (already there)", id, nxi);
255                 if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
256                         nxi = ERR_PTR(-EBUSY);
257                 else
258                         nxi = ERR_PTR(-EEXIST);
259                 goto out_unlock;
260         }
261         /* dynamic nid creation blocker */
262         else if (id >= MIN_D_CONTEXT) {
263                 vxdprintk(VXD_CBIT(nid, 0),
264                         "create_nx_info(%d) (dynamic rejected)", id);
265                 nxi = ERR_PTR(-EINVAL);
266                 goto out_unlock;
267         }
268
269         /* new context */
270         vxdprintk(VXD_CBIT(nid, 0),
271                 "create_nx_info(%d) = %p (new)", id, new);
272         claim_nx_info(new, NULL);
273         __hash_nx_info(get_nx_info(new));
274         nxi = new, new = NULL;
275
276 out_unlock:
277         spin_unlock(&nx_info_hash_lock);
278         if (new)
279                 __dealloc_nx_info(new);
280         return nxi;
281 }
282
283
284
285 /*      exported stuff                                          */
286
287
288 void unhash_nx_info(struct nx_info *nxi)
289 {
290         __shutdown_nx_info(nxi);
291         spin_lock(&nx_info_hash_lock);
292         __unhash_nx_info(nxi);
293         spin_unlock(&nx_info_hash_lock);
294 }
295
296 #ifdef  CONFIG_VSERVER_LEGACYNET
297
298 struct nx_info *create_nx_info(void)
299 {
300         return __create_nx_info(NX_DYNAMIC_ID);
301 }
302
303 #endif
304
305 /*      lookup_nx_info()
306
307         * search for a nx_info and get() it
308         * negative id means current                             */
309
310 struct nx_info *lookup_nx_info(int id)
311 {
312         struct nx_info *nxi = NULL;
313
314         if (id < 0) {
315                 nxi = get_nx_info(current->nx_info);
316         } else if (id > 1) {
317                 spin_lock(&nx_info_hash_lock);
318                 nxi = get_nx_info(__lookup_nx_info(id));
319                 spin_unlock(&nx_info_hash_lock);
320         }
321         return nxi;
322 }
323
324 /*      nid_is_hashed()
325
326         * verify that nid is still hashed                       */
327
328 int nid_is_hashed(nid_t nid)
329 {
330         int hashed;
331
332         spin_lock(&nx_info_hash_lock);
333         hashed = (__lookup_nx_info(nid) != NULL);
334         spin_unlock(&nx_info_hash_lock);
335         return hashed;
336 }
337
338
339 #ifdef  CONFIG_PROC_FS
340
341 /*      get_nid_list()
342
343         * get a subset of hashed nids for proc
344         * assumes size is at least one                          */
345
346 int get_nid_list(int index, unsigned int *nids, int size)
347 {
348         int hindex, nr_nids = 0;
349
350         /* only show current and children */
351         if (!nx_check(0, VS_ADMIN|VS_WATCH)) {
352                 if (index > 0)
353                         return 0;
354                 nids[nr_nids] = nx_current_nid();
355                 return 1;
356         }
357
358         for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
359                 struct hlist_head *head = &nx_info_hash[hindex];
360                 struct hlist_node *pos;
361
362                 spin_lock(&nx_info_hash_lock);
363                 hlist_for_each(pos, head) {
364                         struct nx_info *nxi;
365
366                         if (--index > 0)
367                                 continue;
368
369                         nxi = hlist_entry(pos, struct nx_info, nx_hlist);
370                         nids[nr_nids] = nxi->nx_id;
371                         if (++nr_nids >= size) {
372                                 spin_unlock(&nx_info_hash_lock);
373                                 goto out;
374                         }
375                 }
376                 /* keep the lock time short */
377                 spin_unlock(&nx_info_hash_lock);
378         }
379 out:
380         return nr_nids;
381 }
382 #endif
383
384
385 /*
386  *      migrate task to new network
387  *      gets nxi, puts old_nxi on change
388  */
389
390 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
391 {
392         struct nx_info *old_nxi;
393         int ret = 0;
394
395         if (!p || !nxi)
396                 BUG();
397
398         vxdprintk(VXD_CBIT(nid, 5),
399                 "nx_migrate_task(%p,%p[#%d.%d.%d])",
400                 p, nxi, nxi->nx_id,
401                 atomic_read(&nxi->nx_usecnt),
402                 atomic_read(&nxi->nx_tasks));
403
404         if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
405                 !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
406                 return -EACCES;
407
408         if (nx_info_state(nxi, NXS_SHUTDOWN))
409                 return -EFAULT;
410
411         /* maybe disallow this completely? */
412         old_nxi = task_get_nx_info(p);
413         if (old_nxi == nxi)
414                 goto out;
415
416         task_lock(p);
417         if (old_nxi)
418                 clr_nx_info(&p->nx_info);
419         claim_nx_info(nxi, p);
420         set_nx_info(&p->nx_info, nxi);
421         p->nid = nxi->nx_id;
422         task_unlock(p);
423
424         vxdprintk(VXD_CBIT(nid, 5),
425                 "moved task %p into nxi:%p[#%d]",
426                 p, nxi, nxi->nx_id);
427
428         if (old_nxi)
429                 release_nx_info(old_nxi, p);
430         ret = 0;
431 out:
432         put_nx_info(old_nxi);
433         return ret;
434 }
435
436
437 #ifdef CONFIG_INET
438
439 #include <linux/netdevice.h>
440 #include <linux/inetdevice.h>
441
442 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
443 {
444         if (!nxi)
445                 return 1;
446         if (!ifa)
447                 return 0;
448         return addr_in_nx_info(nxi, ifa->ifa_local);
449 }
450
451 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
452 {
453         struct in_device *in_dev;
454         struct in_ifaddr **ifap;
455         struct in_ifaddr *ifa;
456         int ret = 0;
457
458         if (!nxi)
459                 return 1;
460
461         if (!dev)
462                 goto out;
463         in_dev = in_dev_get(dev);
464         if (!in_dev)
465                 goto out;
466
467         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
468                 ifap = &ifa->ifa_next) {
469                 if (addr_in_nx_info(nxi, ifa->ifa_local)) {
470                         ret = 1;
471                         break;
472                 }
473         }
474         in_dev_put(in_dev);
475 out:
476         return ret;
477 }
478
479 /*
480  *      check if address is covered by socket
481  *
482  *      sk:     the socket to check against
483  *      addr:   the address in question (must be != 0)
484  */
485 static inline int __addr_in_socket(const struct sock *sk, uint32_t addr)
486 {
487         struct nx_info *nxi = sk->sk_nx_info;
488         uint32_t saddr = inet_rcv_saddr(sk);
489
490         vxdprintk(VXD_CBIT(net, 5),
491                 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
492                 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
493                 (sk->sk_socket?sk->sk_socket->flags:0));
494
495         if (saddr) {
496                 /* direct address match */
497                 return (saddr == addr);
498         } else if (nxi) {
499                 /* match against nx_info */
500                 return addr_in_nx_info(nxi, addr);
501         } else {
502                 /* unrestricted any socket */
503                 return 1;
504         }
505 }
506
507
508 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, const struct sock *sk)
509 {
510         vxdprintk(VXD_CBIT(net, 2),
511                 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
512                 nxi, sk, VXD_QUAD(addr));
513
514         if (addr) {
515                 /* check real address */
516                 return __addr_in_socket(sk, addr);
517         } else if (nxi) {
518                 /* check against nx_info */
519                 int i, n = nxi->nbipv4;
520
521                 for (i=0; i<n; i++)
522                         if (__addr_in_socket(sk, nxi->ipv4[i]))
523                                 return 1;
524                 return 0;
525         } else {
526                 /* check against any */
527                 return 1;
528         }
529 }
530
531 #endif /* CONFIG_INET */
532
533 void nx_set_persistent(struct nx_info *nxi)
534 {
535         vxdprintk(VXD_CBIT(nid, 6),
536                 "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
537
538         get_nx_info(nxi);
539         claim_nx_info(nxi, NULL);
540 }
541
542 void nx_clear_persistent(struct nx_info *nxi)
543 {
544         vxdprintk(VXD_CBIT(nid, 6),
545                 "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
546
547         release_nx_info(nxi, NULL);
548         put_nx_info(nxi);
549 }
550
551 void nx_update_persistent(struct nx_info *nxi)
552 {
553         if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
554                 nx_set_persistent(nxi);
555         else
556                 nx_clear_persistent(nxi);
557 }
558
559 /* vserver syscall commands below here */
560
561 /* taks nid and nx_info functions */
562
563 #include <asm/uaccess.h>
564
565
566 int vc_task_nid(uint32_t id, void __user *data)
567 {
568         nid_t nid;
569
570         if (id) {
571                 struct task_struct *tsk;
572
573                 if (!nx_check(0, VS_ADMIN|VS_WATCH))
574                         return -EPERM;
575
576                 read_lock(&tasklist_lock);
577                 tsk = find_task_by_real_pid(id);
578                 nid = (tsk) ? tsk->nid : -ESRCH;
579                 read_unlock(&tasklist_lock);
580         }
581         else
582                 nid = nx_current_nid();
583         return nid;
584 }
585
586
587 int vc_nx_info(struct nx_info *nxi, void __user *data)
588 {
589         struct vcmd_nx_info_v0 vc_data;
590
591         vc_data.nid = nxi->nx_id;
592
593         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
594                 return -EFAULT;
595         return 0;
596 }
597
598
599 /* network functions */
600
601 int vc_net_create(uint32_t nid, void __user *data)
602 {
603         struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
604         struct nx_info *new_nxi;
605         int ret;
606
607         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
608                 return -EFAULT;
609
610         if ((nid > MAX_S_CONTEXT) && (nid != NX_DYNAMIC_ID))
611                 return -EINVAL;
612         if (nid < 2)
613                 return -EINVAL;
614
615         new_nxi = __create_nx_info(nid);
616         if (IS_ERR(new_nxi))
617                 return PTR_ERR(new_nxi);
618
619         /* initial flags */
620         new_nxi->nx_flags = vc_data.flagword;
621
622         ret = -ENOEXEC;
623         if (vs_net_change(new_nxi, VSC_NETUP))
624                 goto out;
625
626         ret = nx_migrate_task(current, new_nxi);
627         if (ret)
628                 goto out;
629
630         /* return context id on success */
631         ret = new_nxi->nx_id;
632
633         /* get a reference for persistent contexts */
634         if ((vc_data.flagword & NXF_PERSISTENT))
635                 nx_set_persistent(new_nxi);
636 out:
637         release_nx_info(new_nxi, NULL);
638         put_nx_info(new_nxi);
639         return ret;
640 }
641
642
643 int vc_net_migrate(struct nx_info *nxi, void __user *data)
644 {
645         return nx_migrate_task(current, nxi);
646 }
647
648 int vc_net_add(struct nx_info *nxi, void __user *data)
649 {
650         struct vcmd_net_addr_v0 vc_data;
651         int index, pos, ret = 0;
652
653         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
654                 return -EFAULT;
655
656         switch (vc_data.type) {
657         case NXA_TYPE_IPV4:
658                 if ((vc_data.count < 1) || (vc_data.count > 4))
659                         return -EINVAL;
660                 break;
661
662         default:
663                 break;
664         }
665
666         switch (vc_data.type) {
667         case NXA_TYPE_IPV4:
668                 index = 0;
669                 while ((index < vc_data.count) &&
670                         ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
671                         nxi->ipv4[pos] = vc_data.ip[index];
672                         nxi->mask[pos] = vc_data.mask[index];
673                         index++;
674                         nxi->nbipv4++;
675                 }
676                 ret = index;
677                 break;
678
679         case NXA_TYPE_IPV4|NXA_MOD_BCAST:
680                 nxi->v4_bcast = vc_data.ip[0];
681                 ret = 1;
682                 break;
683
684         default:
685                 ret = -EINVAL;
686                 break;
687         }
688         return ret;
689 }
690
691 int vc_net_remove(struct nx_info * nxi, void __user *data)
692 {
693         struct vcmd_net_addr_v0 vc_data;
694
695         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
696                 return -EFAULT;
697
698         switch (vc_data.type) {
699         case NXA_TYPE_ANY:
700                 nxi->nbipv4 = 0;
701                 break;
702
703         default:
704                 return -EINVAL;
705         }
706         return 0;
707 }
708
709 int vc_get_nflags(struct nx_info *nxi, void __user *data)
710 {
711         struct vcmd_net_flags_v0 vc_data;
712
713         vc_data.flagword = nxi->nx_flags;
714
715         /* special STATE flag handling */
716         vc_data.mask = vs_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
717
718         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
719                 return -EFAULT;
720         return 0;
721 }
722
723 int vc_set_nflags(struct nx_info *nxi, void __user *data)
724 {
725         struct vcmd_net_flags_v0 vc_data;
726         uint64_t mask, trigger;
727
728         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
729                 return -EFAULT;
730
731         /* special STATE flag handling */
732         mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
733         trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
734
735         nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
736                 vc_data.flagword, mask);
737         if (trigger & NXF_PERSISTENT)
738                 nx_update_persistent(nxi);
739
740         return 0;
741 }
742
743 int vc_get_ncaps(struct nx_info *nxi, void __user *data)
744 {
745         struct vcmd_net_caps_v0 vc_data;
746
747         vc_data.ncaps = nxi->nx_ncaps;
748         vc_data.cmask = ~0UL;
749
750         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
751                 return -EFAULT;
752         return 0;
753 }
754
755 int vc_set_ncaps(struct nx_info *nxi, void __user *data)
756 {
757         struct vcmd_net_caps_v0 vc_data;
758
759         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
760                 return -EFAULT;
761
762         nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
763                 vc_data.ncaps, vc_data.cmask);
764         return 0;
765 }
766
767
768 #include <linux/module.h>
769
770 EXPORT_SYMBOL_GPL(free_nx_info);
771 EXPORT_SYMBOL_GPL(unhash_nx_info);
772