4c3eb435a52135f4b88b62be9a63f9dfe9fc980a
[linux-2.6.git] / kernel / vserver / network.c
1 /*
2  *  linux/kernel/vserver/network.c
3  *
4  *  Virtual Server: Network Support
5  *
6  *  Copyright (C) 2003-2005  Herbert Pƶtzl
7  *
8  *  V0.01  broken out from vcontext V0.05
9  *  V0.02  cleaned up implementation
10  *  V0.03  added equiv nx commands
11  *  V0.04  switch to RCU based hash
12  *
13  */
14
15 #include <linux/config.h>
16 #include <linux/slab.h>
17 #include <linux/vserver/network_cmd.h>
18 #include <linux/rcupdate.h>
19 #include <net/tcp.h>
20
21 #include <asm/errno.h>
22
23
24 /*      __alloc_nx_info()
25
26         * allocate an initialized nx_info struct
27         * doesn't make it visible (hash)                        */
28
29 static struct nx_info *__alloc_nx_info(nid_t nid)
30 {
31         struct nx_info *new = NULL;
32
33         vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
34
35         /* would this benefit from a slab cache? */
36         new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
37         if (!new)
38                 return 0;
39
40         memset (new, 0, sizeof(struct nx_info));
41         new->nx_id = nid;
42         INIT_RCU_HEAD(&new->nx_rcu);
43         INIT_HLIST_NODE(&new->nx_hlist);
44         atomic_set(&new->nx_refcnt, 0);
45         atomic_set(&new->nx_usecnt, 0);
46
47         /* rest of init goes here */
48
49         vxdprintk(VXD_CBIT(nid, 0),
50                 "alloc_nx_info() = %p", new);
51         return new;
52 }
53
54 /*      __dealloc_nx_info()
55
56         * final disposal of nx_info                             */
57
58 static void __dealloc_nx_info(struct nx_info *nxi)
59 {
60         vxdprintk(VXD_CBIT(nid, 0),
61                 "dealloc_nx_info(%p)", nxi);
62
63         nxi->nx_hlist.next = LIST_POISON1;
64         nxi->nx_id = -1;
65
66         BUG_ON(atomic_read(&nxi->nx_usecnt));
67         BUG_ON(atomic_read(&nxi->nx_refcnt));
68
69         kfree(nxi);
70 }
71
72 static inline int __free_nx_info(struct nx_info *nxi)
73 {
74         int usecnt, refcnt;
75
76         BUG_ON(!nxi);
77
78         usecnt = atomic_read(&nxi->nx_usecnt);
79         BUG_ON(usecnt < 0);
80
81         refcnt = atomic_read(&nxi->nx_refcnt);
82         BUG_ON(refcnt < 0);
83
84         if (!usecnt)
85                 __dealloc_nx_info(nxi);
86         return usecnt;
87 }
88
89 /*      exported stuff                                          */
90
91 void free_nx_info(struct nx_info *nxi)
92 {
93         /* context shutdown is mandatory */
94         // BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
95
96         // BUG_ON(nxi->nx_state & NXS_HASHED);
97
98         BUG_ON(__free_nx_info(nxi));
99 }
100
101
102 /*      hash table for nx_info hash */
103
104 #define NX_HASH_SIZE    13
105
106 struct hlist_head nx_info_hash[NX_HASH_SIZE];
107
108 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
109
110
111 static inline unsigned int __hashval(nid_t nid)
112 {
113         return (nid % NX_HASH_SIZE);
114 }
115
116
117
118 /*      __hash_nx_info()
119
120         * add the nxi to the global hash table
121         * requires the hash_lock to be held                     */
122
123 static inline void __hash_nx_info(struct nx_info *nxi)
124 {
125         struct hlist_head *head;
126
127         vxdprintk(VXD_CBIT(nid, 4),
128                 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
129         get_nx_info(nxi);
130         head = &nx_info_hash[__hashval(nxi->nx_id)];
131         hlist_add_head(&nxi->nx_hlist, head);
132 }
133
134 /*      __unhash_nx_info()
135
136         * remove the nxi from the global hash table
137         * requires the hash_lock to be held                     */
138
139 static inline void __unhash_nx_info(struct nx_info *nxi)
140 {
141         vxd_assert_lock(&nx_info_hash_lock);
142         vxdprintk(VXD_CBIT(nid, 4),
143                 "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
144         hlist_del(&nxi->nx_hlist);
145         put_nx_info(nxi);
146 }
147
148
149 /*      __lookup_nx_info()
150
151         * requires the hash_lock to be held
152         * doesn't increment the nx_refcnt                       */
153
154 static inline struct nx_info *__lookup_nx_info(nid_t nid)
155 {
156         struct hlist_head *head = &nx_info_hash[__hashval(nid)];
157         struct hlist_node *pos;
158
159         vxd_assert_lock(&nx_info_hash_lock);
160         hlist_for_each(pos, head) {
161                 struct nx_info *nxi =
162                         hlist_entry(pos, struct nx_info, nx_hlist);
163
164                 if (nxi->nx_id == nid) {
165                         return nxi;
166                 }
167         }
168         return NULL;
169 }
170
171
172 /*      __nx_dynamic_id()
173
174         * find unused dynamic nid
175         * requires the hash_lock to be held                     */
176
177 static inline nid_t __nx_dynamic_id(void)
178 {
179         static nid_t seq = MAX_N_CONTEXT;
180         nid_t barrier = seq;
181
182         vxd_assert_lock(&nx_info_hash_lock);
183         do {
184                 if (++seq > MAX_N_CONTEXT)
185                         seq = MIN_D_CONTEXT;
186                 if (!__lookup_nx_info(seq)) {
187                         vxdprintk(VXD_CBIT(nid, 4),
188                                 "__nx_dynamic_id: [#%d]", seq);
189                         return seq;
190                 }
191         } while (barrier != seq);
192         return 0;
193 }
194
195 /*      __loc_nx_info()
196
197         * locate or create the requested context
198         * get() it and if new hash it                           */
199
200 static struct nx_info * __loc_nx_info(int id, int *err)
201 {
202         struct nx_info *new, *nxi = NULL;
203
204         vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id);
205
206         if (!(new = __alloc_nx_info(id))) {
207                 *err = -ENOMEM;
208                 return NULL;
209         }
210
211         /* required to make dynamic xids unique */
212         spin_lock(&nx_info_hash_lock);
213
214         /* dynamic context requested */
215         if (id == NX_DYNAMIC_ID) {
216                 id = __nx_dynamic_id();
217                 if (!id) {
218                         printk(KERN_ERR "no dynamic context available.\n");
219                         goto out_unlock;
220                 }
221                 new->nx_id = id;
222         }
223         /* existing context requested */
224         else if ((nxi = __lookup_nx_info(id))) {
225                 /* context in setup is not available */
226                 if (nxi->nx_flags & VXF_STATE_SETUP) {
227                         vxdprintk(VXD_CBIT(nid, 0),
228                                 "loc_nx_info(%d) = %p (not available)", id, nxi);
229                         nxi = NULL;
230                         *err = -EBUSY;
231                 } else {
232                         vxdprintk(VXD_CBIT(nid, 0),
233                                 "loc_nx_info(%d) = %p (found)", id, nxi);
234                         get_nx_info(nxi);
235                         *err = 0;
236                 }
237                 goto out_unlock;
238         }
239
240         /* new context requested */
241         vxdprintk(VXD_CBIT(nid, 0),
242                 "loc_nx_info(%d) = %p (new)", id, new);
243         __hash_nx_info(get_nx_info(new));
244         nxi = new, new = NULL;
245         *err = 1;
246
247 out_unlock:
248         spin_unlock(&nx_info_hash_lock);
249         if (new)
250                 __dealloc_nx_info(new);
251         return nxi;
252 }
253
254
255
256 /*      exported stuff                                          */
257
258
259 void unhash_nx_info(struct nx_info *nxi)
260 {
261         spin_lock(&nx_info_hash_lock);
262         __unhash_nx_info(nxi);
263         spin_unlock(&nx_info_hash_lock);
264 }
265
266 /*      locate_nx_info()
267
268         * search for a nx_info and get() it
269         * negative id means current                             */
270
271 struct nx_info *locate_nx_info(int id)
272 {
273         struct nx_info *nxi;
274
275         if (id < 0) {
276                 nxi = get_nx_info(current->nx_info);
277         } else {
278                 spin_lock(&nx_info_hash_lock);
279                 nxi = get_nx_info(__lookup_nx_info(id));
280                 spin_unlock(&nx_info_hash_lock);
281         }
282         return nxi;
283 }
284
285 /*      nid_is_hashed()
286
287         * verify that nid is still hashed                       */
288
289 int nid_is_hashed(nid_t nid)
290 {
291         int hashed;
292
293         spin_lock(&nx_info_hash_lock);
294         hashed = (__lookup_nx_info(nid) != NULL);
295         spin_unlock(&nx_info_hash_lock);
296         return hashed;
297 }
298
299 #ifdef  CONFIG_VSERVER_LEGACYNET
300
301 struct nx_info *locate_or_create_nx_info(int id)
302 {
303         int err;
304
305         return __loc_nx_info(id, &err);
306 }
307
308 struct nx_info *create_nx_info(void)
309 {
310         struct nx_info *new;
311         int err;
312
313         vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void");
314         if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
315                 return NULL;
316         return new;
317 }
318
319
320 #endif
321
322 #ifdef  CONFIG_PROC_FS
323
324 int get_nid_list(int index, unsigned int *nids, int size)
325 {
326         int hindex, nr_nids = 0;
327
328         for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
329                 struct hlist_head *head = &nx_info_hash[hindex];
330                 struct hlist_node *pos;
331
332                 spin_lock(&nx_info_hash_lock);
333                 hlist_for_each(pos, head) {
334                         struct nx_info *nxi;
335
336                         if (--index > 0)
337                                 continue;
338
339                         nxi = hlist_entry(pos, struct nx_info, nx_hlist);
340                         nids[nr_nids] = nxi->nx_id;
341                         if (++nr_nids >= size) {
342                                 spin_unlock(&nx_info_hash_lock);
343                                 goto out;
344                         }
345                 }
346                 /* keep the lock time short */
347                 spin_unlock(&nx_info_hash_lock);
348         }
349 out:
350         return nr_nids;
351 }
352 #endif
353
354
355 /*
356  *      migrate task to new network
357  */
358
359 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
360 {
361         struct nx_info *old_nxi;
362         int ret = 0;
363
364         if (!p || !nxi)
365                 BUG();
366
367         vxdprintk(VXD_CBIT(nid, 5),
368                 "nx_migrate_task(%p,%p[#%d.%d.%d])",
369                 p, nxi, nxi->nx_id,
370                 atomic_read(&nxi->nx_usecnt),
371                 atomic_read(&nxi->nx_refcnt));
372
373         old_nxi = task_get_nx_info(p);
374         if (old_nxi == nxi)
375                 goto out;
376
377         task_lock(p);
378         /* should be handled in set_nx_info !! */
379         if (old_nxi)
380                 clr_nx_info(&p->nx_info);
381         set_nx_info(&p->nx_info, nxi);
382         p->nid = nxi->nx_id;
383         task_unlock(p);
384
385         /* obsoleted by clr/set */
386         // put_nx_info(old_nxi);
387 out:
388         put_nx_info(old_nxi);
389         return ret;
390 }
391
392
393 #include <linux/netdevice.h>
394 #include <linux/inetdevice.h>
395
396
397 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
398 {
399         if (!nxi)
400                 return 1;
401         if (!ifa)
402                 return 0;
403         return addr_in_nx_info(nxi, ifa->ifa_address);
404 }
405
406 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
407 {
408         struct in_device *in_dev = __in_dev_get(dev);
409         struct in_ifaddr **ifap = NULL;
410         struct in_ifaddr *ifa = NULL;
411
412         if (!nxi)
413                 return 1;
414         if (!in_dev)
415                 return 0;
416
417         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
418                 ifap = &ifa->ifa_next) {
419                 if (addr_in_nx_info(nxi, ifa->ifa_address))
420                         return 1;
421         }
422         return 0;
423 }
424
425 /*
426  *      check if address is covered by socket
427  *
428  *      sk:     the socket to check against
429  *      addr:   the address in question (must be != 0)
430  */
431 static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
432 {
433         struct nx_info *nxi = sk->sk_nx_info;
434         uint32_t saddr = tcp_v4_rcv_saddr(sk);
435
436         vxdprintk(VXD_CBIT(net, 5),
437                 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
438                 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
439                 (sk->sk_socket?sk->sk_socket->flags:0));
440
441         if (saddr) {
442                 /* direct address match */
443                 return (saddr == addr);
444         } else if (nxi) {
445                 /* match against nx_info */
446                 return addr_in_nx_info(nxi, addr);
447         } else {
448                 /* unrestricted any socket */
449                 return 1;
450         }
451 }
452
453
454 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
455 {
456         vxdprintk(VXD_CBIT(net, 2),
457                 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
458                 nxi, sk, VXD_QUAD(addr));
459
460         if (addr) {
461                 /* check real address */
462                 return __addr_in_socket(sk, addr);
463         } else if (nxi) {
464                 /* check against nx_info */
465                 int i, n = nxi->nbipv4;
466
467                 for (i=0; i<n; i++)
468                         if (__addr_in_socket(sk, nxi->ipv4[i]))
469                                 return 1;
470                 return 0;
471         } else {
472                 /* check against any */
473                 return 1;
474         }
475 }
476
477
478 /* vserver syscall commands below here */
479
480 /* taks nid and nx_info functions */
481
482 #include <asm/uaccess.h>
483
484
485 int vc_task_nid(uint32_t id, void __user *data)
486 {
487         nid_t nid;
488
489         if (id) {
490                 struct task_struct *tsk;
491
492                 if (!vx_check(0, VX_ADMIN|VX_WATCH))
493                         return -EPERM;
494
495                 read_lock(&tasklist_lock);
496                 tsk = find_task_by_real_pid(id);
497                 nid = (tsk) ? tsk->nid : -ESRCH;
498                 read_unlock(&tasklist_lock);
499         }
500         else
501                 nid = current->nid;
502         return nid;
503 }
504
505
506 int vc_nx_info(uint32_t id, void __user *data)
507 {
508         struct nx_info *nxi;
509         struct vcmd_nx_info_v0 vc_data;
510
511         if (!vx_check(0, VX_ADMIN))
512                 return -ENOSYS;
513         if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
514                 return -EPERM;
515
516         nxi = locate_nx_info(id);
517         if (!nxi)
518                 return -ESRCH;
519
520         vc_data.nid = nxi->nx_id;
521         put_nx_info(nxi);
522
523         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
524                 return -EFAULT;
525         return 0;
526 }
527
528
529 /* network functions */
530
531 int vc_net_create(uint32_t nid, void __user *data)
532 {
533         // int ret = -ENOMEM;
534         struct nx_info *new_nxi;
535         int ret;
536
537         if (!capable(CAP_SYS_ADMIN))
538                 return -EPERM;
539
540         if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID))
541                 return -EINVAL;
542
543         if (nid < 1)
544                 return -EINVAL;
545
546         new_nxi = __loc_nx_info(nid, &ret);
547         if (!new_nxi)
548                 return ret;
549         if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
550                 ret = -EEXIST;
551                 goto out_put;
552         }
553
554         ret = new_nxi->nx_id;
555         nx_migrate_task(current, new_nxi);
556 out_put:
557         put_nx_info(new_nxi);
558         return ret;
559 }
560
561
562 int vc_net_migrate(uint32_t id, void __user *data)
563 {
564         struct nx_info *nxi;
565
566         if (!capable(CAP_SYS_ADMIN))
567                 return -EPERM;
568
569         nxi = locate_nx_info(id);
570         if (!nxi)
571                 return -ESRCH;
572         nx_migrate_task(current, nxi);
573         put_nx_info(nxi);
574         return 0;
575 }
576
577 int vc_net_add(uint32_t id, void __user *data)
578 {
579         struct nx_info *nxi;
580         struct vcmd_net_nx_v0 vc_data;
581
582         if (!capable(CAP_SYS_ADMIN))
583                 return -EPERM;
584         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
585                 return -EFAULT;
586
587         nxi = locate_nx_info(id);
588         if (!nxi)
589                 return -ESRCH;
590
591         // add ip to net context here
592         put_nx_info(nxi);
593         return 0;
594 }
595
596 int vc_net_remove(uint32_t id, void __user *data)
597 {
598         struct nx_info *nxi;
599         struct vcmd_net_nx_v0 vc_data;
600
601         if (!capable(CAP_SYS_ADMIN))
602                 return -EPERM;
603         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
604                 return -EFAULT;
605
606         nxi = locate_nx_info(id);
607         if (!nxi)
608                 return -ESRCH;
609
610         // rem ip from net context here
611         put_nx_info(nxi);
612         return 0;
613 }
614
615
616
617 int vc_get_nflags(uint32_t id, void __user *data)
618 {
619         struct nx_info *nxi;
620         struct vcmd_net_flags_v0 vc_data;
621
622         if (!capable(CAP_SYS_ADMIN))
623                 return -EPERM;
624
625         nxi = locate_nx_info(id);
626         if (!nxi)
627                 return -ESRCH;
628
629         vc_data.flagword = nxi->nx_flags;
630
631         /* special STATE flag handling */
632         vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
633
634         put_nx_info(nxi);
635
636         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
637                 return -EFAULT;
638         return 0;
639 }
640
641 int vc_set_nflags(uint32_t id, void __user *data)
642 {
643         struct nx_info *nxi;
644         struct vcmd_net_flags_v0 vc_data;
645         uint64_t mask, trigger;
646
647         if (!capable(CAP_SYS_ADMIN))
648                 return -EPERM;
649         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
650                 return -EFAULT;
651
652         nxi = locate_nx_info(id);
653         if (!nxi)
654                 return -ESRCH;
655
656         /* special STATE flag handling */
657         mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME);
658         trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
659         // if (trigger & IPF_STATE_SETUP)
660
661         nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
662                 vc_data.flagword, mask);
663         put_nx_info(nxi);
664         return 0;
665 }
666
667 int vc_get_ncaps(uint32_t id, void __user *data)
668 {
669         struct nx_info *nxi;
670         struct vcmd_net_caps_v0 vc_data;
671
672         if (!capable(CAP_SYS_ADMIN))
673                 return -EPERM;
674
675         nxi = locate_nx_info(id);
676         if (!nxi)
677                 return -ESRCH;
678
679         vc_data.ncaps = nxi->nx_ncaps;
680         vc_data.cmask = ~0UL;
681         put_nx_info(nxi);
682
683         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
684                 return -EFAULT;
685         return 0;
686 }
687
688 int vc_set_ncaps(uint32_t id, void __user *data)
689 {
690         struct nx_info *nxi;
691         struct vcmd_net_caps_v0 vc_data;
692
693         if (!capable(CAP_SYS_ADMIN))
694                 return -EPERM;
695         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
696                 return -EFAULT;
697
698         nxi = locate_nx_info(id);
699         if (!nxi)
700                 return -ESRCH;
701
702         nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
703                 vc_data.ncaps, vc_data.cmask);
704         put_nx_info(nxi);
705         return 0;
706 }
707
708
709 #include <linux/module.h>
710
711 EXPORT_SYMBOL_GPL(free_nx_info);
712 EXPORT_SYMBOL_GPL(unhash_nx_info);
713