66e09de41bd22486d9422f9a6fde2b9b9486cff9
[linux-2.6.git] / kernel / vserver / network.c
1 /*
2  *  linux/kernel/vserver/network.c
3  *
4  *  Virtual Server: Network Support
5  *
6  *  Copyright (C) 2003-2005  Herbert Pƶtzl
7  *
8  *  V0.01  broken out from vcontext V0.05
9  *  V0.02  cleaned up implementation
10  *  V0.03  added equiv nx commands
11  *  V0.04  switch to RCU based hash
12  *  V0.05  and back to locking again
13  *
14  */
15
16 #include <linux/slab.h>
17 #include <linux/vserver/network_cmd.h>
18 #include <linux/rcupdate.h>
19 #include <net/tcp.h>
20
21 #include <asm/errno.h>
22
23
24 /*      __alloc_nx_info()
25
26         * allocate an initialized nx_info struct
27         * doesn't make it visible (hash)                        */
28
29 static struct nx_info *__alloc_nx_info(nid_t nid)
30 {
31         struct nx_info *new = NULL;
32
33         vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
34
35         /* would this benefit from a slab cache? */
36         new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
37         if (!new)
38                 return 0;
39
40         memset (new, 0, sizeof(struct nx_info));
41         new->nx_id = nid;
42         INIT_HLIST_NODE(&new->nx_hlist);
43         atomic_set(&new->nx_usecnt, 0);
44         atomic_set(&new->nx_tasks, 0);
45         new->nx_state = 0;
46
47         new->nx_flags = NXF_INIT_SET;
48
49         /* rest of init goes here */
50
51         vxdprintk(VXD_CBIT(nid, 0),
52                 "alloc_nx_info(%d) = %p", nid, new);
53         return new;
54 }
55
56 /*      __dealloc_nx_info()
57
58         * final disposal of nx_info                             */
59
60 static void __dealloc_nx_info(struct nx_info *nxi)
61 {
62         vxdprintk(VXD_CBIT(nid, 0),
63                 "dealloc_nx_info(%p)", nxi);
64
65         nxi->nx_hlist.next = LIST_POISON1;
66         nxi->nx_id = -1;
67
68         BUG_ON(atomic_read(&nxi->nx_usecnt));
69         BUG_ON(atomic_read(&nxi->nx_tasks));
70
71         nxi->nx_state |= NXS_RELEASED;
72         kfree(nxi);
73 }
74
75 static void __shutdown_nx_info(struct nx_info *nxi)
76 {
77         nxi->nx_state |= NXS_SHUTDOWN;
78         vs_net_change(nxi, VSC_NETDOWN);
79 }
80
81 /*      exported stuff                                          */
82
83 void free_nx_info(struct nx_info *nxi)
84 {
85         /* context shutdown is mandatory */
86         BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
87
88         /* context must not be hashed */
89         BUG_ON(nxi->nx_state & NXS_HASHED);
90
91         BUG_ON(atomic_read(&nxi->nx_usecnt));
92         BUG_ON(atomic_read(&nxi->nx_tasks));
93
94         __dealloc_nx_info(nxi);
95 }
96
97
98 /*      hash table for nx_info hash */
99
100 #define NX_HASH_SIZE    13
101
102 struct hlist_head nx_info_hash[NX_HASH_SIZE];
103
104 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
105
106
107 static inline unsigned int __hashval(nid_t nid)
108 {
109         return (nid % NX_HASH_SIZE);
110 }
111
112
113
114 /*      __hash_nx_info()
115
116         * add the nxi to the global hash table
117         * requires the hash_lock to be held                     */
118
119 static inline void __hash_nx_info(struct nx_info *nxi)
120 {
121         struct hlist_head *head;
122
123         vxd_assert_lock(&nx_info_hash_lock);
124         vxdprintk(VXD_CBIT(nid, 4),
125                 "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
126
127         /* context must not be hashed */
128         BUG_ON(nx_info_state(nxi, NXS_HASHED));
129
130         nxi->nx_state |= NXS_HASHED;
131         head = &nx_info_hash[__hashval(nxi->nx_id)];
132         hlist_add_head(&nxi->nx_hlist, head);
133 }
134
135 /*      __unhash_nx_info()
136
137         * remove the nxi from the global hash table
138         * requires the hash_lock to be held                     */
139
140 static inline void __unhash_nx_info(struct nx_info *nxi)
141 {
142         vxd_assert_lock(&nx_info_hash_lock);
143         vxdprintk(VXD_CBIT(nid, 4),
144                 "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
145
146         /* context must be hashed */
147         BUG_ON(!nx_info_state(nxi, NXS_HASHED));
148
149         nxi->nx_state &= ~NXS_HASHED;
150         hlist_del(&nxi->nx_hlist);
151 }
152
153
154 /*      __lookup_nx_info()
155
156         * requires the hash_lock to be held
157         * doesn't increment the nx_refcnt                       */
158
159 static inline struct nx_info *__lookup_nx_info(nid_t nid)
160 {
161         struct hlist_head *head = &nx_info_hash[__hashval(nid)];
162         struct hlist_node *pos;
163         struct nx_info *nxi;
164
165         vxd_assert_lock(&nx_info_hash_lock);
166         hlist_for_each(pos, head) {
167                 nxi = hlist_entry(pos, struct nx_info, nx_hlist);
168
169                 if (nxi->nx_id == nid)
170                         goto found;
171         }
172         nxi = NULL;
173 found:
174         vxdprintk(VXD_CBIT(nid, 0),
175                 "__lookup_nx_info(#%u): %p[#%u]",
176                 nid, nxi, nxi?nxi->nx_id:0);
177         return nxi;
178 }
179
180
181 /*      __nx_dynamic_id()
182
183         * find unused dynamic nid
184         * requires the hash_lock to be held                     */
185
186 static inline nid_t __nx_dynamic_id(void)
187 {
188         static nid_t seq = MAX_N_CONTEXT;
189         nid_t barrier = seq;
190
191         vxd_assert_lock(&nx_info_hash_lock);
192         do {
193                 if (++seq > MAX_N_CONTEXT)
194                         seq = MIN_D_CONTEXT;
195                 if (!__lookup_nx_info(seq)) {
196                         vxdprintk(VXD_CBIT(nid, 4),
197                                 "__nx_dynamic_id: [#%d]", seq);
198                         return seq;
199                 }
200         } while (barrier != seq);
201         return 0;
202 }
203
204 /*      __create_nx_info()
205
206         * create the requested context
207         * get() and hash it                                     */
208
209 static struct nx_info * __create_nx_info(int id)
210 {
211         struct nx_info *new, *nxi = NULL;
212
213         vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
214
215         if (!(new = __alloc_nx_info(id)))
216                 return ERR_PTR(-ENOMEM);
217
218         /* required to make dynamic xids unique */
219         spin_lock(&nx_info_hash_lock);
220
221         /* dynamic context requested */
222         if (id == NX_DYNAMIC_ID) {
223                 id = __nx_dynamic_id();
224                 if (!id) {
225                         printk(KERN_ERR "no dynamic context available.\n");
226                         nxi = ERR_PTR(-EAGAIN);
227                         goto out_unlock;
228                 }
229                 new->nx_id = id;
230         }
231         /* static context requested */
232         else if ((nxi = __lookup_nx_info(id))) {
233                 vxdprintk(VXD_CBIT(nid, 0),
234                         "create_nx_info(%d) = %p (already there)", id, nxi);
235                 if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
236                         nxi = ERR_PTR(-EBUSY);
237                 else
238                         nxi = ERR_PTR(-EEXIST);
239                 goto out_unlock;
240         }
241         /* dynamic nid creation blocker */
242         else if (id >= MIN_D_CONTEXT) {
243                 vxdprintk(VXD_CBIT(nid, 0),
244                         "create_nx_info(%d) (dynamic rejected)", id);
245                 nxi = ERR_PTR(-EINVAL);
246                 goto out_unlock;
247         }
248
249         /* new context */
250         vxdprintk(VXD_CBIT(nid, 0),
251                 "create_nx_info(%d) = %p (new)", id, new);
252         __hash_nx_info(get_nx_info(new));
253         nxi = new, new = NULL;
254
255 out_unlock:
256         spin_unlock(&nx_info_hash_lock);
257         if (new)
258                 __dealloc_nx_info(new);
259         return nxi;
260 }
261
262
263
264 /*      exported stuff                                          */
265
266
267 void unhash_nx_info(struct nx_info *nxi)
268 {
269         __shutdown_nx_info(nxi);
270         spin_lock(&nx_info_hash_lock);
271         __unhash_nx_info(nxi);
272         spin_unlock(&nx_info_hash_lock);
273 }
274
275 #ifdef  CONFIG_VSERVER_LEGACYNET
276
277 struct nx_info *create_nx_info(void)
278 {
279         return __create_nx_info(NX_DYNAMIC_ID);
280 }
281
282 #endif
283
284 /*      lookup_nx_info()
285
286         * search for a nx_info and get() it
287         * negative id means current                             */
288
289 struct nx_info *lookup_nx_info(int id)
290 {
291         struct nx_info *nxi = NULL;
292
293         if (id < 0) {
294                 nxi = get_nx_info(current->nx_info);
295         } else if (id > 1) {
296                 spin_lock(&nx_info_hash_lock);
297                 nxi = get_nx_info(__lookup_nx_info(id));
298                 spin_unlock(&nx_info_hash_lock);
299         }
300         return nxi;
301 }
302
303 /*      nid_is_hashed()
304
305         * verify that nid is still hashed                       */
306
307 int nid_is_hashed(nid_t nid)
308 {
309         int hashed;
310
311         spin_lock(&nx_info_hash_lock);
312         hashed = (__lookup_nx_info(nid) != NULL);
313         spin_unlock(&nx_info_hash_lock);
314         return hashed;
315 }
316
317
318 #ifdef  CONFIG_PROC_FS
319
320 int get_nid_list(int index, unsigned int *nids, int size)
321 {
322         int hindex, nr_nids = 0;
323
324         for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
325                 struct hlist_head *head = &nx_info_hash[hindex];
326                 struct hlist_node *pos;
327
328                 spin_lock(&nx_info_hash_lock);
329                 hlist_for_each(pos, head) {
330                         struct nx_info *nxi;
331
332                         if (--index > 0)
333                                 continue;
334
335                         nxi = hlist_entry(pos, struct nx_info, nx_hlist);
336                         nids[nr_nids] = nxi->nx_id;
337                         if (++nr_nids >= size) {
338                                 spin_unlock(&nx_info_hash_lock);
339                                 goto out;
340                         }
341                 }
342                 /* keep the lock time short */
343                 spin_unlock(&nx_info_hash_lock);
344         }
345 out:
346         return nr_nids;
347 }
348 #endif
349
350
351 /*
352  *      migrate task to new network
353  *      gets nxi, puts old_nxi on change
354  */
355
356 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
357 {
358         struct nx_info *old_nxi;
359         int ret = 0;
360
361         if (!p || !nxi)
362                 BUG();
363
364         vxdprintk(VXD_CBIT(nid, 5),
365                 "nx_migrate_task(%p,%p[#%d.%d.%d])",
366                 p, nxi, nxi->nx_id,
367                 atomic_read(&nxi->nx_usecnt),
368                 atomic_read(&nxi->nx_tasks));
369
370         /* maybe disallow this completely? */
371         old_nxi = task_get_nx_info(p);
372         if (old_nxi == nxi)
373                 goto out;
374
375         task_lock(p);
376         if (old_nxi)
377                 clr_nx_info(&p->nx_info);
378         claim_nx_info(nxi, p);
379         set_nx_info(&p->nx_info, nxi);
380         p->nid = nxi->nx_id;
381         task_unlock(p);
382
383         vxdprintk(VXD_CBIT(nid, 5),
384                 "moved task %p into nxi:%p[#%d]",
385                 p, nxi, nxi->nx_id);
386
387         if (old_nxi)
388                 release_nx_info(old_nxi, p);
389 out:
390         put_nx_info(old_nxi);
391         return ret;
392 }
393
394
395 #ifdef CONFIG_INET
396
397 #include <linux/netdevice.h>
398 #include <linux/inetdevice.h>
399
400 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
401 {
402         if (!nxi)
403                 return 1;
404         if (!ifa)
405                 return 0;
406         return addr_in_nx_info(nxi, ifa->ifa_local);
407 }
408
409 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
410 {
411         struct in_device *in_dev;
412         struct in_ifaddr **ifap;
413         struct in_ifaddr *ifa;
414         int ret = 0;
415
416         if (!nxi)
417                 return 1;
418
419         in_dev = in_dev_get(dev);
420         if (!in_dev)
421                 goto out;
422
423         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
424                 ifap = &ifa->ifa_next) {
425                 if (addr_in_nx_info(nxi, ifa->ifa_local)) {
426                         ret = 1;
427                         break;
428                 }
429         }
430         in_dev_put(in_dev);
431 out:
432         return ret;
433 }
434
435 /*
436  *      check if address is covered by socket
437  *
438  *      sk:     the socket to check against
439  *      addr:   the address in question (must be != 0)
440  */
441 static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
442 {
443         struct nx_info *nxi = sk->sk_nx_info;
444         uint32_t saddr = inet_rcv_saddr(sk);
445
446         vxdprintk(VXD_CBIT(net, 5),
447                 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
448                 sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
449                 (sk->sk_socket?sk->sk_socket->flags:0));
450
451         if (saddr) {
452                 /* direct address match */
453                 return (saddr == addr);
454         } else if (nxi) {
455                 /* match against nx_info */
456                 return addr_in_nx_info(nxi, addr);
457         } else {
458                 /* unrestricted any socket */
459                 return 1;
460         }
461 }
462
463
464 int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
465 {
466         vxdprintk(VXD_CBIT(net, 2),
467                 "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
468                 nxi, sk, VXD_QUAD(addr));
469
470         if (addr) {
471                 /* check real address */
472                 return __addr_in_socket(sk, addr);
473         } else if (nxi) {
474                 /* check against nx_info */
475                 int i, n = nxi->nbipv4;
476
477                 for (i=0; i<n; i++)
478                         if (__addr_in_socket(sk, nxi->ipv4[i]))
479                                 return 1;
480                 return 0;
481         } else {
482                 /* check against any */
483                 return 1;
484         }
485 }
486
487 #endif /* CONFIG_INET */
488
489 void nx_set_persistent(struct nx_info *nxi)
490 {
491         if (nx_info_flags(nxi, NXF_PERSISTENT, 0)) {
492                 get_nx_info(nxi);
493                 claim_nx_info(nxi, current);
494         } else {
495                 release_nx_info(nxi, current);
496                 put_nx_info(nxi);
497         }
498 }
499
500 /* vserver syscall commands below here */
501
502 /* taks nid and nx_info functions */
503
504 #include <asm/uaccess.h>
505
506
507 int vc_task_nid(uint32_t id, void __user *data)
508 {
509         nid_t nid;
510
511         if (id) {
512                 struct task_struct *tsk;
513
514                 if (!vx_check(0, VX_ADMIN|VX_WATCH))
515                         return -EPERM;
516
517                 read_lock(&tasklist_lock);
518                 tsk = find_task_by_real_pid(id);
519                 nid = (tsk) ? tsk->nid : -ESRCH;
520                 read_unlock(&tasklist_lock);
521         }
522         else
523                 nid = nx_current_nid();
524         return nid;
525 }
526
527
528 int vc_nx_info(uint32_t id, void __user *data)
529 {
530         struct nx_info *nxi;
531         struct vcmd_nx_info_v0 vc_data;
532
533         if (!vx_check(0, VX_ADMIN))
534                 return -ENOSYS;
535         if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
536                 return -EPERM;
537
538         nxi = lookup_nx_info(id);
539         if (!nxi)
540                 return -ESRCH;
541
542         vc_data.nid = nxi->nx_id;
543         put_nx_info(nxi);
544
545         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
546                 return -EFAULT;
547         return 0;
548 }
549
550
551 /* network functions */
552
553 int vc_net_create(uint32_t nid, void __user *data)
554 {
555         struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
556         struct nx_info *new_nxi;
557         int ret;
558
559         if (!capable(CAP_SYS_ADMIN))
560                 return -EPERM;
561         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
562                 return -EFAULT;
563
564         if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID))
565                 return -EINVAL;
566         if (nid < 2)
567                 return -EINVAL;
568
569         new_nxi = __create_nx_info(nid);
570         if (IS_ERR(new_nxi))
571                 return PTR_ERR(new_nxi);
572
573         /* initial flags */
574         new_nxi->nx_flags = vc_data.flagword;
575
576         /* get a reference for persistent contexts */
577         if ((vc_data.flagword & NXF_PERSISTENT))
578                 nx_set_persistent(new_nxi);
579
580         vs_net_change(new_nxi, VSC_NETUP);
581         ret = new_nxi->nx_id;
582         nx_migrate_task(current, new_nxi);
583         /* if this fails, we might end up with a hashed nx_info */
584         put_nx_info(new_nxi);
585         return ret;
586 }
587
588
589 int vc_net_migrate(uint32_t id, void __user *data)
590 {
591         struct nx_info *nxi;
592
593         if (!capable(CAP_SYS_ADMIN))
594                 return -EPERM;
595
596         nxi = lookup_nx_info(id);
597         if (!nxi)
598                 return -ESRCH;
599         nx_migrate_task(current, nxi);
600         put_nx_info(nxi);
601         return 0;
602 }
603
604 int vc_net_add(uint32_t nid, void __user *data)
605 {
606         struct vcmd_net_addr_v0 vc_data;
607         struct nx_info *nxi;
608         int index, pos, ret = 0;
609
610         if (!capable(CAP_SYS_ADMIN))
611                 return -EPERM;
612         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
613                 return -EFAULT;
614
615         switch (vc_data.type) {
616         case NXA_TYPE_IPV4:
617                 if ((vc_data.count < 1) || (vc_data.count > 4))
618                         return -EINVAL;
619                 break;
620
621         default:
622                 break;
623         }
624
625         nxi = lookup_nx_info(nid);
626         if (!nxi)
627                 return -ESRCH;
628
629         switch (vc_data.type) {
630         case NXA_TYPE_IPV4:
631                 index = 0;
632                 while ((index < vc_data.count) &&
633                         ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
634                         nxi->ipv4[pos] = vc_data.ip[index];
635                         nxi->mask[pos] = vc_data.mask[index];
636                         index++;
637                         nxi->nbipv4++;
638                 }
639                 ret = index;
640                 break;
641
642         case NXA_TYPE_IPV4|NXA_MOD_BCAST:
643                 nxi->v4_bcast = vc_data.ip[0];
644                 ret = 1;
645                 break;
646
647         default:
648                 ret = -EINVAL;
649                 break;
650         }
651
652         put_nx_info(nxi);
653         return ret;
654 }
655
656 int vc_net_remove(uint32_t nid, void __user *data)
657 {
658         struct vcmd_net_addr_v0 vc_data;
659         struct nx_info *nxi;
660         int ret = 0;
661
662         if (!capable(CAP_SYS_ADMIN))
663                 return -EPERM;
664         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
665                 return -EFAULT;
666
667         nxi = lookup_nx_info(nid);
668         if (!nxi)
669                 return -ESRCH;
670
671         switch ((unsigned)vc_data.type) {
672         case NXA_TYPE_ANY:
673                 nxi->nbipv4 = 0;
674                 break;
675
676         default:
677                 ret = -EINVAL;
678                 break;
679         }
680
681         put_nx_info(nxi);
682         return ret;
683 }
684
685 int vc_get_nflags(uint32_t id, void __user *data)
686 {
687         struct nx_info *nxi;
688         struct vcmd_net_flags_v0 vc_data;
689
690         if (!capable(CAP_SYS_ADMIN))
691                 return -EPERM;
692
693         nxi = lookup_nx_info(id);
694         if (!nxi)
695                 return -ESRCH;
696
697         vc_data.flagword = nxi->nx_flags;
698
699         /* special STATE flag handling */
700         vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
701
702         put_nx_info(nxi);
703
704         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
705                 return -EFAULT;
706         return 0;
707 }
708
709 int vc_set_nflags(uint32_t id, void __user *data)
710 {
711         struct nx_info *nxi;
712         struct vcmd_net_flags_v0 vc_data;
713         uint64_t mask, trigger;
714
715         if (!capable(CAP_SYS_ADMIN))
716                 return -EPERM;
717         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
718                 return -EFAULT;
719
720         nxi = lookup_nx_info(id);
721         if (!nxi)
722                 return -ESRCH;
723
724         /* special STATE flag handling */
725         mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
726         trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
727
728         nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
729                 vc_data.flagword, mask);
730         if (trigger & NXF_PERSISTENT)
731                 nx_set_persistent(nxi);
732
733         put_nx_info(nxi);
734         return 0;
735 }
736
737 int vc_get_ncaps(uint32_t id, void __user *data)
738 {
739         struct nx_info *nxi;
740         struct vcmd_net_caps_v0 vc_data;
741
742         if (!capable(CAP_SYS_ADMIN))
743                 return -EPERM;
744
745         nxi = lookup_nx_info(id);
746         if (!nxi)
747                 return -ESRCH;
748
749         vc_data.ncaps = nxi->nx_ncaps;
750         vc_data.cmask = ~0UL;
751         put_nx_info(nxi);
752
753         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
754                 return -EFAULT;
755         return 0;
756 }
757
758 int vc_set_ncaps(uint32_t id, void __user *data)
759 {
760         struct nx_info *nxi;
761         struct vcmd_net_caps_v0 vc_data;
762
763         if (!capable(CAP_SYS_ADMIN))
764                 return -EPERM;
765         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
766                 return -EFAULT;
767
768         nxi = lookup_nx_info(id);
769         if (!nxi)
770                 return -ESRCH;
771
772         nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
773                 vc_data.ncaps, vc_data.cmask);
774         put_nx_info(nxi);
775         return 0;
776 }
777
778
779 #include <linux/module.h>
780
781 EXPORT_SYMBOL_GPL(free_nx_info);
782 EXPORT_SYMBOL_GPL(unhash_nx_info);
783