vserver 2.0 rc7
[linux-2.6.git] / kernel / vserver / network.c
1 /*
2  *  linux/kernel/vserver/network.c
3  *
4  *  Virtual Server: Network Support
5  *
6  *  Copyright (C) 2003-2005  Herbert Pƶtzl
7  *
8  *  V0.01  broken out from vcontext V0.05
9  *  V0.02  cleaned up implementation
10  *  V0.03  added equiv nx commands
11  *  V0.04  switch to RCU based hash
12  *  V0.05  and back to locking again
13  *
14  */
15
16 #include <linux/config.h>
17 #include <linux/slab.h>
18 #include <linux/vserver/network_cmd.h>
19 #include <linux/rcupdate.h>
20 #include <net/tcp.h>
21
22 #include <asm/errno.h>
23
24
25 /*      __alloc_nx_info()
26
27         * allocate an initialized nx_info struct
28         * doesn't make it visible (hash)                        */
29
30 static struct nx_info *__alloc_nx_info(nid_t nid)
31 {
32         struct nx_info *new = NULL;
33
34         vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
35
36         /* would this benefit from a slab cache? */
37         new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
38         if (!new)
39                 return 0;
40
41         memset (new, 0, sizeof(struct nx_info));
42         new->nx_id = nid;
43         INIT_HLIST_NODE(&new->nx_hlist);
44         atomic_set(&new->nx_usecnt, 0);
45         atomic_set(&new->nx_tasks, 0);
46         new->nx_state = 0;
47
48         new->nx_flags = NXF_INIT_SET;
49
50         /* rest of init goes here */
51
52         vxdprintk(VXD_CBIT(nid, 0),
53                 "alloc_nx_info(%d) = %p", nid, new);
54         return new;
55 }
56
57 /*      __dealloc_nx_info()
58
59         * final disposal of nx_info                             */
60
61 static void __dealloc_nx_info(struct nx_info *nxi)
62 {
63         vxdprintk(VXD_CBIT(nid, 0),
64                 "dealloc_nx_info(%p)", nxi);
65
66         nxi->nx_hlist.next = LIST_POISON1;
67         nxi->nx_id = -1;
68
69         BUG_ON(atomic_read(&nxi->nx_usecnt));
70         BUG_ON(atomic_read(&nxi->nx_tasks));
71
72         nxi->nx_state |= NXS_RELEASED;
73         kfree(nxi);
74 }
75
76 static void __shutdown_nx_info(struct nx_info *nxi)
77 {
78         nxi->nx_state |= NXS_SHUTDOWN;
79         vs_net_change(nxi, VSC_NETDOWN);
80 }
81
82 /*      exported stuff                                          */
83
84 void free_nx_info(struct nx_info *nxi)
85 {
86         /* context shutdown is mandatory */
87         BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
88
89         /* context must not be hashed */
90         BUG_ON(nxi->nx_state & NXS_HASHED);
91
92         BUG_ON(atomic_read(&nxi->nx_usecnt));
93         BUG_ON(atomic_read(&nxi->nx_tasks));
94
95         __dealloc_nx_info(nxi);
96 }
97
98
99 /*      hash table for nx_info hash */
100
101 #define NX_HASH_SIZE    13
102
103 struct hlist_head nx_info_hash[NX_HASH_SIZE];
104
105 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
106
107
108 static inline unsigned int __hashval(nid_t nid)
109 {
110         return (nid % NX_HASH_SIZE);
111 }
112
113
114
115 /*      __hash_nx_info()
116
117         * add the nxi to the global hash table
118         * requires the hash_lock to be held                     */
119
120 static inline void __hash_nx_info(struct nx_info *nxi)
121 {
122         struct hlist_head *head;
123
124         vxd_assert_lock(&nx_info_hash_lock);
125         vxdprintk(VXD_CBIT(nid, 4),
126                 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
127
128         /* context must not be hashed */
129         BUG_ON(nx_info_state(nxi, NXS_HASHED));
130
131         nxi->nx_state |= NXS_HASHED;
132         head = &nx_info_hash[__hashval(nxi->nx_id)];
133         hlist_add_head(&nxi->nx_hlist, head);
134 }
135
136 /*      __unhash_nx_info()
137
138         * remove the nxi from the global hash table
139         * requires the hash_lock to be held                     */
140
141 static inline void __unhash_nx_info(struct nx_info *nxi)
142 {
143         vxd_assert_lock(&nx_info_hash_lock);
144         vxdprintk(VXD_CBIT(nid, 4),
145                 "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
146
147         /* context must be hashed */
148         BUG_ON(!nx_info_state(nxi, NXS_HASHED));
149
150         nxi->nx_state &= ~NXS_HASHED;
151         hlist_del(&nxi->nx_hlist);
152 }
153
154
155 /*      __lookup_nx_info()
156
157         * requires the hash_lock to be held
158         * doesn't increment the nx_refcnt                       */
159
160 static inline struct nx_info *__lookup_nx_info(nid_t nid)
161 {
162         struct hlist_head *head = &nx_info_hash[__hashval(nid)];
163         struct hlist_node *pos;
164         struct nx_info *nxi;
165
166         vxd_assert_lock(&nx_info_hash_lock);
167         hlist_for_each(pos, head) {
168                 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
169
170                 if (nxi->nx_id == nid)
171                         goto found;
172         }
173         nxi = NULL;
174 found:
175         vxdprintk(VXD_CBIT(nid, 0),
176                 "__lookup_nx_info(#%u): %p[#%u]",
177                 nid, nxi, nxi?nxi->nx_id:0);
178         return nxi;
179 }
180
181
182 /*      __nx_dynamic_id()
183
184         * find unused dynamic nid
185         * requires the hash_lock to be held                     */
186
187 static inline nid_t __nx_dynamic_id(void)
188 {
189         static nid_t seq = MAX_N_CONTEXT;
190         nid_t barrier = seq;
191
192         vxd_assert_lock(&nx_info_hash_lock);
193         do {
194                 if (++seq > MAX_N_CONTEXT)
195                         seq = MIN_D_CONTEXT;
196                 if (!__lookup_nx_info(seq)) {
197                         vxdprintk(VXD_CBIT(nid, 4),
198                                 "__nx_dynamic_id: [#%d]", seq);
199                         return seq;
200                 }
201         } while (barrier != seq);
202         return 0;
203 }
204
205 /*      __create_nx_info()
206
207         * create the requested context
208         * get() and hash it                             */
209
210 static struct nx_info * __create_nx_info(int id)
211 {
212         struct nx_info *new, *nxi = NULL;
213
214         vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
215
216         if (!(new = __alloc_nx_info(id)))
217                 return ERR_PTR(-ENOMEM);
218
219         /* required to make dynamic xids unique */
220         spin_lock(&nx_info_hash_lock);
221
222         /* dynamic context requested */
223         if (id == NX_DYNAMIC_ID) {
224                 id = __nx_dynamic_id();
225                 if (!id) {
226                         printk(KERN_ERR "no dynamic context available.\n");
227                         nxi = ERR_PTR(-EAGAIN);
228                         goto out_unlock;
229                 }
230                 new->nx_id = id;
231         }
232         /* static context requested */
233         else if ((nxi = __lookup_nx_info(id))) {
234                 vxdprintk(VXD_CBIT(nid, 0),
235                         "create_nx_info(%d) = %p (already there)", id, nxi);
236                 if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
237                         nxi = ERR_PTR(-EBUSY);
238                 else
239                         nxi = ERR_PTR(-EEXIST);
240                 goto out_unlock;
241         }
242         /* dynamic nid creation blocker */
243         else if (id >= MIN_D_CONTEXT) {
244                 vxdprintk(VXD_CBIT(nid, 0),
245                         "create_nx_info(%d) (dynamic rejected)", id);
246                 nxi = ERR_PTR(-EINVAL);
247                 goto out_unlock;
248         }
249
250         /* new context */
251         vxdprintk(VXD_CBIT(nid, 0),
252                 "create_nx_info(%d) = %p (new)", id, new);
253         __hash_nx_info(get_nx_info(new));
254         nxi = new, new = NULL;
255
256 out_unlock:
257         spin_unlock(&nx_info_hash_lock);
258         if (new)
259                 __dealloc_nx_info(new);
260         return nxi;
261 }
262
263
264
265 /*      exported stuff                                          */
266
267
268 void unhash_nx_info(struct nx_info *nxi)
269 {
270         __shutdown_nx_info(nxi);
271         spin_lock(&nx_info_hash_lock);
272         __unhash_nx_info(nxi);
273         spin_unlock(&nx_info_hash_lock);
274 }
275
276 #ifdef  CONFIG_VSERVER_LEGACYNET
277
278 struct nx_info *create_nx_info(void)
279 {
280         return __create_nx_info(NX_DYNAMIC_ID);
281 }
282
283 #endif
284
285 /*      locate_nx_info()
286
287         * search for a nx_info and get() it
288         * negative id means current                             */
289
290 struct nx_info *locate_nx_info(int id)
291 {
292         struct nx_info *nxi = NULL;
293
294         if (id < 0) {
295                 nxi = get_nx_info(current->nx_info);
296         } else if (id > 1) {
297                 spin_lock(&nx_info_hash_lock);
298                 nxi = get_nx_info(__lookup_nx_info(id));
299                 spin_unlock(&nx_info_hash_lock);
300         }
301         return nxi;
302 }
303
304 /*      nid_is_hashed()
305
306         * verify that nid is still hashed                       */
307
308 int nid_is_hashed(nid_t nid)
309 {
310         int hashed;
311
312         spin_lock(&nx_info_hash_lock);
313         hashed = (__lookup_nx_info(nid) != NULL);
314         spin_unlock(&nx_info_hash_lock);
315         return hashed;
316 }
317
318
319 #ifdef  CONFIG_PROC_FS
320
321 int get_nid_list(int index, unsigned int *nids, int size)
322 {
323         int hindex, nr_nids = 0;
324
325         for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
326                 struct hlist_head *head = &nx_info_hash[hindex];
327                 struct hlist_node *pos;
328
329                 spin_lock(&nx_info_hash_lock);
330                 hlist_for_each(pos, head) {
331                         struct nx_info *nxi;
332
333                         if (--index > 0)
334                                 continue;
335
336                         nxi = hlist_entry(pos, struct nx_info, nx_hlist);
337                         nids[nr_nids] = nxi->nx_id;
338                         if (++nr_nids >= size) {
339                                 spin_unlock(&nx_info_hash_lock);
340                                 goto out;
341                         }
342                 }
343                 /* keep the lock time short */
344                 spin_unlock(&nx_info_hash_lock);
345         }
346 out:
347         return nr_nids;
348 }
349 #endif
350
351
352 /*
353  *      migrate task to new network
354  *      gets nxi, puts old_nxi on change
355  */
356
357 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
358 {
359         struct nx_info *old_nxi;
360         int ret = 0;
361
362         if (!p || !nxi)
363                 BUG();
364
365         vxdprintk(VXD_CBIT(nid, 5),
366                 "nx_migrate_task(%p,%p[#%d.%d.%d])",
367                 p, nxi, nxi->nx_id,
368                 atomic_read(&nxi->nx_usecnt),
369                 atomic_read(&nxi->nx_tasks));
370
371         /* maybe disallow this completely? */
372         old_nxi = task_get_nx_info(p);
373         if (old_nxi == nxi)
374                 goto out;
375
376         task_lock(p);
377         if (old_nxi)
378                 clr_nx_info(&p->nx_info);
379         claim_nx_info(nxi, p);
380         set_nx_info(&p->nx_info, nxi);
381         p->nid = nxi->nx_id;
382         task_unlock(p);
383
384         vxdprintk(VXD_CBIT(nid, 5),
385                 "moved task %p into nxi:%p[#%d]",
386                 p, nxi, nxi->nx_id);
387
388         if (old_nxi)
389                 release_nx_info(old_nxi, p);
390 out:
391         put_nx_info(old_nxi);
392         return ret;
393 }
394
395
396 #include <linux/netdevice.h>
397 #include <linux/inetdevice.h>
398
399
400 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
401 {
402         if (!nxi)
403                 return 1;
404         if (!ifa)
405                 return 0;
406         return addr_in_nx_info(nxi, ifa->ifa_address);
407 }
408
409 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
410 {
411         struct in_device *in_dev = __in_dev_get(dev);
412         struct in_ifaddr **ifap = NULL;
413         struct in_ifaddr *ifa = NULL;
414
415         if (!nxi)
416                 return 1;
417         if (!in_dev)
418                 return 0;
419
420         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
421                 ifap = &ifa->ifa_next) {
422                 if (addr_in_nx_info(nxi, ifa->ifa_address))
423                         return 1;
424         }
425         return 0;
426 }
427
428 /*
429  *      check if address is covered by socket
430  *
431  *      sk:     the socket to check against
432  *      addr:   the address in question (must be != 0)
433  */
434 static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
435 {
436         struct nx_info *nxi = sk->sk_nx_info;
437         uint32_t saddr = tcp_v4_rcv_saddr(sk);
438
439         vxdprintk(VXD_CBIT(net, 5),
440                 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
441                 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
442                 (sk->sk_socket?sk->sk_socket->flags:0));
443
444         if (saddr) {
445                 /* direct address match */
446                 return (saddr == addr);
447         } else if (nxi) {
448                 /* match against nx_info */
449                 return addr_in_nx_info(nxi, addr);
450         } else {
451                 /* unrestricted any socket */
452                 return 1;
453         }
454 }
455
456
457 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
458 {
459         vxdprintk(VXD_CBIT(net, 2),
460                 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
461                 nxi, sk, VXD_QUAD(addr));
462
463         if (addr) {
464                 /* check real address */
465                 return __addr_in_socket(sk, addr);
466         } else if (nxi) {
467                 /* check against nx_info */
468                 int i, n = nxi->nbipv4;
469
470                 for (i=0; i<n; i++)
471                         if (__addr_in_socket(sk, nxi->ipv4[i]))
472                                 return 1;
473                 return 0;
474         } else {
475                 /* check against any */
476                 return 1;
477         }
478 }
479
480
481 /* vserver syscall commands below here */
482
483 /* taks nid and nx_info functions */
484
485 #include <asm/uaccess.h>
486
487
488 int vc_task_nid(uint32_t id, void __user *data)
489 {
490         nid_t nid;
491
492         if (id) {
493                 struct task_struct *tsk;
494
495                 if (!vx_check(0, VX_ADMIN|VX_WATCH))
496                         return -EPERM;
497
498                 read_lock(&tasklist_lock);
499                 tsk = find_task_by_real_pid(id);
500                 nid = (tsk) ? tsk->nid : -ESRCH;
501                 read_unlock(&tasklist_lock);
502         }
503         else
504                 nid = current->nid;
505         return nid;
506 }
507
508
509 int vc_nx_info(uint32_t id, void __user *data)
510 {
511         struct nx_info *nxi;
512         struct vcmd_nx_info_v0 vc_data;
513
514         if (!vx_check(0, VX_ADMIN))
515                 return -ENOSYS;
516         if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
517                 return -EPERM;
518
519         nxi = locate_nx_info(id);
520         if (!nxi)
521                 return -ESRCH;
522
523         vc_data.nid = nxi->nx_id;
524         put_nx_info(nxi);
525
526         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
527                 return -EFAULT;
528         return 0;
529 }
530
531
532 /* network functions */
533
534 int vc_net_create(uint32_t nid, void __user *data)
535 {
536         struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
537         struct nx_info *new_nxi;
538         int ret;
539
540         if (!capable(CAP_SYS_ADMIN))
541                 return -EPERM;
542         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
543                 return -EFAULT;
544
545         if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID))
546                 return -EINVAL;
547         if (nid < 2)
548                 return -EINVAL;
549
550         new_nxi = __create_nx_info(nid);
551         if (IS_ERR(new_nxi))
552                 return PTR_ERR(new_nxi);
553
554         /* initial flags */
555         new_nxi->nx_flags = vc_data.flagword;
556
557         vs_net_change(new_nxi, VSC_NETUP);
558         ret = new_nxi->nx_id;
559         nx_migrate_task(current, new_nxi);
560         /* if this fails, we might end up with a hashed nx_info */
561         put_nx_info(new_nxi);
562         return ret;
563 }
564
565
566 int vc_net_migrate(uint32_t id, void __user *data)
567 {
568         struct nx_info *nxi;
569
570         if (!capable(CAP_SYS_ADMIN))
571                 return -EPERM;
572
573         nxi = locate_nx_info(id);
574         if (!nxi)
575                 return -ESRCH;
576         nx_migrate_task(current, nxi);
577         put_nx_info(nxi);
578         return 0;
579 }
580
581 int vc_net_add(uint32_t nid, void __user *data)
582 {
583         struct vcmd_net_addr_v0 vc_data;
584         struct nx_info *nxi;
585         int index, pos, ret = 0;
586
587         if (!capable(CAP_SYS_ADMIN))
588                 return -EPERM;
589         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
590                 return -EFAULT;
591
592         switch (vc_data.type) {
593         case NXA_TYPE_IPV4:
594                 if ((vc_data.count < 1) || (vc_data.count > 4))
595                         return -EINVAL;
596                 break;
597
598         default:
599                 break;
600         }
601
602         nxi = locate_nx_info(nid);
603         if (!nxi)
604                 return -ESRCH;
605
606         switch (vc_data.type) {
607         case NXA_TYPE_IPV4:
608                 index = 0;
609                 while ((index < vc_data.count) &&
610                         ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
611                         nxi->ipv4[pos] = vc_data.ip[index];
612                         nxi->mask[pos] = vc_data.mask[index];
613                         index++;
614                         nxi->nbipv4++;
615                 }
616                 ret = index;
617                 break;
618
619         case NXA_TYPE_IPV4|NXA_MOD_BCAST:
620                 nxi->v4_bcast = vc_data.ip[0];
621                 ret = 1;
622                 break;
623
624         default:
625                 ret = -EINVAL;
626                 break;
627         }
628
629         put_nx_info(nxi);
630         return ret;
631 }
632
633 int vc_net_remove(uint32_t nid, void __user *data)
634 {
635         struct vcmd_net_addr_v0 vc_data;
636         struct nx_info *nxi;
637         int ret = 0;
638
639         if (!capable(CAP_SYS_ADMIN))
640                 return -EPERM;
641         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
642                 return -EFAULT;
643
644         nxi = locate_nx_info(nid);
645         if (!nxi)
646                 return -ESRCH;
647
648         switch (vc_data.type) {
649         case NXA_TYPE_ANY:
650                 nxi->nbipv4 = 0;
651                 break;
652
653         default:
654                 ret = -EINVAL;
655                 break;
656         }
657
658         put_nx_info(nxi);
659         return ret;
660 }
661
662 int vc_get_nflags(uint32_t id, void __user *data)
663 {
664         struct nx_info *nxi;
665         struct vcmd_net_flags_v0 vc_data;
666
667         if (!capable(CAP_SYS_ADMIN))
668                 return -EPERM;
669
670         nxi = locate_nx_info(id);
671         if (!nxi)
672                 return -ESRCH;
673
674         vc_data.flagword = nxi->nx_flags;
675
676         /* special STATE flag handling */
677         vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
678
679         put_nx_info(nxi);
680
681         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
682                 return -EFAULT;
683         return 0;
684 }
685
686 int vc_set_nflags(uint32_t id, void __user *data)
687 {
688         struct nx_info *nxi;
689         struct vcmd_net_flags_v0 vc_data;
690         uint64_t mask, trigger;
691
692         if (!capable(CAP_SYS_ADMIN))
693                 return -EPERM;
694         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
695                 return -EFAULT;
696
697         nxi = locate_nx_info(id);
698         if (!nxi)
699                 return -ESRCH;
700
701         /* special STATE flag handling */
702         mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
703         trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
704
705         nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
706                 vc_data.flagword, mask);
707         put_nx_info(nxi);
708         return 0;
709 }
710
711 int vc_get_ncaps(uint32_t id, void __user *data)
712 {
713         struct nx_info *nxi;
714         struct vcmd_net_caps_v0 vc_data;
715
716         if (!capable(CAP_SYS_ADMIN))
717                 return -EPERM;
718
719         nxi = locate_nx_info(id);
720         if (!nxi)
721                 return -ESRCH;
722
723         vc_data.ncaps = nxi->nx_ncaps;
724         vc_data.cmask = ~0UL;
725         put_nx_info(nxi);
726
727         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
728                 return -EFAULT;
729         return 0;
730 }
731
732 int vc_set_ncaps(uint32_t id, void __user *data)
733 {
734         struct nx_info *nxi;
735         struct vcmd_net_caps_v0 vc_data;
736
737         if (!capable(CAP_SYS_ADMIN))
738                 return -EPERM;
739         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
740                 return -EFAULT;
741
742         nxi = locate_nx_info(id);
743         if (!nxi)
744                 return -ESRCH;
745
746         nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
747                 vc_data.ncaps, vc_data.cmask);
748         put_nx_info(nxi);
749         return 0;
750 }
751
752
753 #include <linux/module.h>
754
755 EXPORT_SYMBOL_GPL(free_nx_info);
756 EXPORT_SYMBOL_GPL(unhash_nx_info);
757