Merge to Fedora kernel-2.6.7-1.492
[linux-2.6.git] / kernel / vserver / network.c
1 /*
2  *  linux/kernel/vserver/network.c
3  *
4  *  Virtual Server: Network Support
5  *
6  *  Copyright (C) 2003-2004  Herbert Pƶtzl
7  *
8  *  V0.01  broken out from vcontext V0.05
9  *  V0.02  cleaned up implementation
10  *  V0.03  added equiv nx commands
11  *  V0.04  switch to RCU based hash
12  *
13  */
14
15 #include <linux/config.h>
16 #include <linux/slab.h>
17 #include <linux/vserver.h>
18 #include <linux/vs_base.h>
19 #include <linux/vs_network.h>
20 #include <linux/rcupdate.h>
21
22 #include <asm/errno.h>
23
24
25 /*      __alloc_nx_info()
26
27         * allocate an initialized nx_info struct
28         * doesn't make it visible (hash)                        */
29
30 static struct nx_info *__alloc_nx_info(nid_t nid)
31 {
32         struct nx_info *new = NULL;
33         
34         nxdprintk("alloc_nx_info()\n");
35
36         /* would this benefit from a slab cache? */
37         new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
38         if (!new)
39                 return 0;
40         
41         memset (new, 0, sizeof(struct nx_info));
42         new->nx_id = nid;
43         INIT_RCU_HEAD(&new->nx_rcu);
44         INIT_HLIST_NODE(&new->nx_hlist);
45         atomic_set(&new->nx_refcnt, 0);
46         atomic_set(&new->nx_usecnt, 0);
47
48         /* rest of init goes here */
49         
50         nxdprintk("alloc_nx_info() = %p\n", new);
51         return new;
52 }
53
54 /*      __dealloc_nx_info()
55
56         * final disposal of nx_info                             */
57
58 static void __dealloc_nx_info(struct nx_info *nxi)
59 {
60         nxdprintk("dealloc_nx_info(%p)\n", nxi);
61
62         nxi->nx_hlist.next = LIST_POISON1;
63         nxi->nx_id = -1;
64         
65         BUG_ON(atomic_read(&nxi->nx_usecnt));
66         BUG_ON(atomic_read(&nxi->nx_refcnt));
67
68         kfree(nxi);
69 }
70
71
72 /*      hash table for nx_info hash */
73
74 #define NX_HASH_SIZE    13
75
76 struct hlist_head nx_info_hash[NX_HASH_SIZE];
77
78 static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
79
80
81 static inline unsigned int __hashval(nid_t nid)
82 {
83         return (nid % NX_HASH_SIZE);
84 }
85
86
87
88 /*      __hash_nx_info()
89
90         * add the nxi to the global hash table
91         * requires the hash_lock to be held                     */
92
93 static inline void __hash_nx_info(struct nx_info *nxi)
94 {
95         struct hlist_head *head;
96         
97         nxdprintk("__hash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
98         get_nx_info(nxi);
99         head = &nx_info_hash[__hashval(nxi->nx_id)];
100         hlist_add_head_rcu(&nxi->nx_hlist, head);
101 }
102
103 /*      __unhash_nx_info()
104
105         * remove the nxi from the global hash table
106         * requires the hash_lock to be held                     */
107
108 static inline void __unhash_nx_info(struct nx_info *nxi)
109 {
110         nxdprintk("__unhash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
111         hlist_del_rcu(&nxi->nx_hlist);
112         put_nx_info(nxi);
113 }
114
115
116 /*      __lookup_nx_info()
117
118         * requires the rcu_read_lock()
119         * doesn't increment the nx_refcnt                       */
120
121 static inline struct nx_info *__lookup_nx_info(nid_t nid)
122 {
123         struct hlist_head *head = &nx_info_hash[__hashval(nid)];
124         struct hlist_node *pos;
125
126         hlist_for_each_rcu(pos, head) {
127                 struct nx_info *nxi =
128                         hlist_entry(pos, struct nx_info, nx_hlist);
129
130                 if (nxi->nx_id == nid) {
131                         return nxi;
132                 }
133         }
134         return NULL;
135 }
136
137
138 /*      __nx_dynamic_id()
139
140         * find unused dynamic nid
141         * requires the hash_lock to be held                     */
142
143 static inline nid_t __nx_dynamic_id(void)
144 {
145         static nid_t seq = MAX_N_CONTEXT;
146         nid_t barrier = seq;
147         
148         do {
149                 if (++seq > MAX_N_CONTEXT)
150                         seq = MIN_D_CONTEXT;
151                 if (!__lookup_nx_info(seq))
152                         return seq;
153         } while (barrier != seq);
154         return 0;
155 }
156
157 /*      __loc_nx_info()
158
159         * locate or create the requested context
160         * get() it and if new hash it                           */
161
162 static struct nx_info * __loc_nx_info(int id, int *err)
163 {
164         struct nx_info *new, *nxi = NULL;
165         
166         nxdprintk("loc_nx_info(%d)\n", id);
167
168         if (!(new = __alloc_nx_info(id))) {
169                 *err = -ENOMEM;
170                 return NULL;
171         }
172
173         spin_lock(&nx_info_hash_lock);
174
175         /* dynamic context requested */
176         if (id == NX_DYNAMIC_ID) {
177                 id = __nx_dynamic_id();
178                 if (!id) {
179                         printk(KERN_ERR "no dynamic context available.\n");
180                         goto out_unlock;
181                 }
182                 new->nx_id = id;
183         }
184         /* existing context requested */
185         else if ((nxi = __lookup_nx_info(id))) {
186                 /* context in setup is not available */
187                 if (nxi->nx_flags & VXF_STATE_SETUP) {
188                         nxdprintk("loc_nx_info(%d) = %p (not available)\n", id, nxi);
189                         nxi = NULL;
190                         *err = -EBUSY;
191                 } else {
192                         nxdprintk("loc_nx_info(%d) = %p (found)\n", id, nxi);
193                         get_nx_info(nxi);
194                         *err = 0;
195                 }
196                 goto out_unlock;
197         }
198
199         /* new context requested */
200         nxdprintk("loc_nx_info(%d) = %p (new)\n", id, new);
201         __hash_nx_info(get_nx_info(new));
202         nxi = new, new = NULL;
203         *err = 1;
204
205 out_unlock:
206         spin_unlock(&nx_info_hash_lock);
207         if (new)
208                 __dealloc_nx_info(new);
209         return nxi;
210 }
211
212
213
214 /*      exported stuff                                          */
215
216
217
218
219 void rcu_free_nx_info(struct rcu_head *head)
220 {
221         struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
222         int usecnt, refcnt;
223
224         BUG_ON(!nxi || !head);
225
226         usecnt = atomic_read(&nxi->nx_usecnt);
227         BUG_ON(usecnt < 0);
228
229         refcnt = atomic_read(&nxi->nx_refcnt);
230         BUG_ON(refcnt < 0);
231
232         if (!usecnt)
233                 __dealloc_nx_info(nxi);
234         else
235                 printk("!!! rcu didn't free\n");
236 }
237
238 void unhash_nx_info(struct nx_info *nxi)
239 {
240         spin_lock(&nx_info_hash_lock);
241         __unhash_nx_info(nxi);
242         spin_unlock(&nx_info_hash_lock);
243 }
244
245 /*      locate_nx_info()
246
247         * search for a nx_info and get() it                     
248         * negative id means current                             */
249
250 struct nx_info *locate_nx_info(int id)
251 {
252         struct nx_info *nxi;
253         
254         if (id < 0) {
255                 nxi = get_nx_info(current->nx_info);
256         } else {
257                 rcu_read_lock();
258                 nxi = get_nx_info(__lookup_nx_info(id));
259                 rcu_read_unlock();
260         }
261         return nxi;
262 }
263
264 /*      nx_info_is_hashed()
265
266         * verify that nid is still hashed                       */
267
268 int nx_info_is_hashed(nid_t nid)
269 {
270         int hashed;
271
272         rcu_read_lock();
273         hashed = (__lookup_nx_info(nid) != NULL);
274         rcu_read_unlock();
275         return hashed;
276 }
277
278 #ifdef  CONFIG_VSERVER_LEGACY
279
280 struct nx_info *locate_or_create_nx_info(int id)
281 {
282         int err;
283
284         return __loc_nx_info(id, &err);
285 }
286
287 struct nx_info *create_nx_info(void)
288 {
289         struct nx_info *new;
290         int err;
291         
292         nxdprintk("create_nx_info()\n");
293         if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
294                 return NULL;
295         return new;
296 }
297
298
299 #endif
300
301 #ifdef  CONFIG_PROC_FS
302
303 #define hlist_for_each_rcu(pos, head) \
304         for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
305                 pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
306
307 int get_nid_list(int index, unsigned int *nids, int size)
308 {
309         int hindex, nr_nids = 0;
310
311         rcu_read_lock();
312         for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
313                 struct hlist_head *head = &nx_info_hash[hindex];
314                 struct hlist_node *pos;
315
316                 hlist_for_each_rcu(pos, head) {
317                         struct nx_info *nxi;
318
319                         if (--index > 0)
320                                 continue;
321
322                         nxi = hlist_entry(pos, struct nx_info, nx_hlist);
323                         nids[nr_nids] = nxi->nx_id;                     
324                         if (++nr_nids >= size)
325                                 goto out;
326                 }
327         }
328 out:
329         rcu_read_unlock();
330         return nr_nids;
331 }
332 #endif
333
334
335 /*
336  *      migrate task to new network
337  */
338
339 int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
340 {
341         struct nx_info *old_nxi;
342         int ret = 0;
343         
344         if (!p || !nxi)
345                 BUG();
346
347         nxdprintk("nx_migrate_task(%p,%p[#%d.%d.%d])\n",
348                 p, nxi, nxi->nx_id,
349                 atomic_read(&nxi->nx_usecnt),
350                 atomic_read(&nxi->nx_refcnt));
351
352         old_nxi = task_get_nx_info(p);
353         if (old_nxi == nxi)
354                 goto out;
355
356         task_lock(p);
357         /* should be handled in set_nx_info !! */
358         if (old_nxi)
359                 clr_nx_info(&p->nx_info);
360         set_nx_info(&p->nx_info, nxi);
361         p->nid = nxi->nx_id;
362         task_unlock(p);
363
364         /* obsoleted by clr/set */
365         // put_nx_info(old_nxi);
366 out:
367         put_nx_info(old_nxi);
368         return ret;
369 }
370
371
372 #include <linux/netdevice.h>
373 #include <linux/inetdevice.h>
374
375 static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi)
376 {
377         int i, nbip;
378
379         nbip = nxi->nbipv4;
380         for (i=0; i<nbip; i++)
381                 if (nxi->ipv4[i] == addr)
382                         return 1;
383         return 0;
384 }
385
386 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
387 {
388         if (nxi && ifa)
389                 return __addr_in_nx_info(ifa->ifa_address, nxi);
390         return 1;
391 }
392
393 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
394 {
395         struct in_device *in_dev = __in_dev_get(dev);
396         struct in_ifaddr **ifap = NULL;
397         struct in_ifaddr *ifa = NULL;
398
399         if (!nxi)
400                 return 1;
401         if (!in_dev)
402                 return 0;
403
404         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
405                 ifap = &ifa->ifa_next) {
406                 if (__addr_in_nx_info(ifa->ifa_address, nxi))
407                         return 1;
408         }
409         return 0;
410 }
411
412
413
414
415 /* vserver syscall commands below here */
416
417 /* taks nid and nx_info functions */
418
419 #include <asm/uaccess.h>
420
421
422 int vc_task_nid(uint32_t id, void __user *data)
423 {
424         nid_t nid;
425
426         if (id) {
427                 struct task_struct *tsk;
428
429                 if (!vx_check(0, VX_ADMIN|VX_WATCH))
430                         return -EPERM;
431
432                 read_lock(&tasklist_lock);
433                 tsk = find_task_by_pid(id);
434                 nid = (tsk) ? tsk->nid : -ESRCH;
435                 read_unlock(&tasklist_lock);
436         }
437         else
438                 nid = current->nid;
439         return nid;
440 }
441
442
443 int vc_nx_info(uint32_t id, void __user *data)
444 {
445         struct nx_info *nxi;
446         struct vcmd_nx_info_v0 vc_data;
447
448         if (!vx_check(0, VX_ADMIN))
449                 return -ENOSYS;
450         if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
451                 return -EPERM;
452
453         nxi = locate_nx_info(id);
454         if (!nxi)
455                 return -ESRCH;
456
457         vc_data.nid = nxi->nx_id;
458         put_nx_info(nxi);
459
460         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
461                 return -EFAULT;
462         return 0;
463 }
464
465
466 /* network functions */
467
468 int vc_net_create(uint32_t nid, void __user *data)
469 {
470         // int ret = -ENOMEM;
471         struct nx_info *new_nxi;
472         int ret;
473
474         if (!capable(CAP_SYS_ADMIN))
475                 return -EPERM;
476
477         if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID))
478                 return -EINVAL;
479
480         if (nid < 1)
481                 return -EINVAL;
482
483         new_nxi = __loc_nx_info(nid, &ret);
484         if (!new_nxi)
485                 return ret;
486         if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
487                 ret = -EEXIST;
488                 goto out_put;
489         }
490
491         ret = new_nxi->nx_id;
492         nx_migrate_task(current, new_nxi);
493 out_put:
494         put_nx_info(new_nxi);
495         return ret;
496 }
497
498
499 int vc_net_migrate(uint32_t id, void __user *data)
500 {
501         struct nx_info *nxi;
502         
503         if (!capable(CAP_SYS_ADMIN))
504                 return -EPERM;
505
506         nxi = locate_nx_info(id);
507         if (!nxi)
508                 return -ESRCH;
509         nx_migrate_task(current, nxi);
510         put_nx_info(nxi);
511         return 0;
512 }
513
514 int vc_net_add(uint32_t id, void __user *data)
515 {
516         struct nx_info *nxi;
517         struct vcmd_net_nx_v0 vc_data;
518
519         if (!capable(CAP_SYS_ADMIN))
520                 return -EPERM;
521         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
522                 return -EFAULT;
523
524         nxi = locate_nx_info(id);
525         if (!nxi)
526                 return -ESRCH;
527
528         // add ip to net context here
529         put_nx_info(nxi);
530         return 0;
531 }
532
533 int vc_net_remove(uint32_t id, void __user *data)
534 {
535         struct nx_info *nxi;
536         struct vcmd_net_nx_v0 vc_data;
537
538         if (!capable(CAP_SYS_ADMIN))
539                 return -EPERM;
540         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
541                 return -EFAULT;
542
543         nxi = locate_nx_info(id);
544         if (!nxi)
545                 return -ESRCH;
546
547         // rem ip from net context here
548         put_nx_info(nxi);
549         return 0;
550 }
551
552
553
554 int vc_get_nflags(uint32_t id, void __user *data)
555 {
556         struct nx_info *nxi;
557         struct vcmd_net_flags_v0 vc_data;
558
559         if (!capable(CAP_SYS_ADMIN))
560                 return -EPERM;
561
562         nxi = locate_nx_info(id);
563         if (!nxi)
564                 return -ESRCH;
565
566         vc_data.flagword = nxi->nx_flags;
567
568         /* special STATE flag handling */
569         vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
570
571         put_nx_info(nxi);
572
573         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
574                 return -EFAULT;
575         return 0;
576 }
577
578 int vc_set_nflags(uint32_t id, void __user *data)
579 {
580         struct nx_info *nxi;
581         struct vcmd_net_flags_v0 vc_data;
582         uint64_t mask, trigger;
583
584         if (!capable(CAP_SYS_ADMIN))
585                 return -EPERM;
586         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
587                 return -EFAULT;
588
589         nxi = locate_nx_info(id);
590         if (!nxi)
591                 return -ESRCH;
592
593         /* special STATE flag handling */
594         mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME);
595         trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
596         // if (trigger & IPF_STATE_SETUP)
597
598         nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
599                 vc_data.flagword, mask);
600         put_nx_info(nxi);
601         return 0;
602 }
603
604 int vc_get_ncaps(uint32_t id, void __user *data)
605 {
606         struct nx_info *nxi;
607         struct vcmd_net_caps_v0 vc_data;
608
609         if (!capable(CAP_SYS_ADMIN))
610                 return -EPERM;
611
612         nxi = locate_nx_info(id);
613         if (!nxi)
614                 return -ESRCH;
615
616         vc_data.ncaps = nxi->nx_ncaps;
617         vc_data.cmask = ~0UL;
618         put_nx_info(nxi);
619
620         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
621                 return -EFAULT;
622         return 0;
623 }
624
625 int vc_set_ncaps(uint32_t id, void __user *data)
626 {
627         struct nx_info *nxi;
628         struct vcmd_net_caps_v0 vc_data;
629
630         if (!capable(CAP_SYS_ADMIN))
631                 return -EPERM;
632         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
633                 return -EFAULT;
634
635         nxi = locate_nx_info(id);
636         if (!nxi)
637                 return -ESRCH;
638
639         nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
640                 vc_data.ncaps, vc_data.cmask);
641         put_nx_info(nxi);
642         return 0;
643 }
644
645
646 #include <linux/module.h>
647
648 EXPORT_SYMBOL_GPL(rcu_free_nx_info);
649 EXPORT_SYMBOL_GPL(nx_info_hash_lock);
650 EXPORT_SYMBOL_GPL(unhash_nx_info);
651