fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / kernel / vserver / context.c
1 /*
2  *  linux/kernel/vserver/context.c
3  *
4  *  Virtual Server: Context Support
5  *
6  *  Copyright (C) 2003-2007  Herbert Pƶtzl
7  *
8  *  V0.01  context helper
9  *  V0.02  vx_ctx_kill syscall command
10  *  V0.03  replaced context_info calls
11  *  V0.04  redesign of struct (de)alloc
12  *  V0.05  rlimit basic implementation
13  *  V0.06  task_xid and info commands
14  *  V0.07  context flags and caps
15  *  V0.08  switch to RCU based hash
16  *  V0.09  revert to non RCU for now
17  *  V0.10  and back to working RCU hash
18  *  V0.11  and back to locking again
19  *  V0.12  referenced context store
20  *  V0.13  separate per cpu data
21  *  V0.14  changed vcmds to vxi arg
22  *  V0.15  added context stat
23  *  V0.16  have __create claim() the vxi
24  *
25  */
26
27 #include <linux/slab.h>
28 #include <linux/types.h>
29 #include <linux/mnt_namespace.h>
30 #include <linux/pid_namespace.h>
31
32 #include <linux/sched.h>
33 #include <linux/vserver/context.h>
34 #include <linux/vserver/network.h>
35 #include <linux/vserver/legacy.h>
36 #include <linux/vserver/debug.h>
37 #include <linux/vserver/limit.h>
38 #include <linux/vserver/limit_int.h>
39 #include <linux/vserver/space.h>
40
41 #include <linux/vs_context.h>
42 #include <linux/vs_limit.h>
43 #include <linux/vserver/context_cmd.h>
44
45 #include <linux/err.h>
46 #include <asm/errno.h>
47
48 #include "cvirt_init.h"
49 #include "cacct_init.h"
50 #include "limit_init.h"
51 #include "sched_init.h"
52
53
54 atomic_t vx_global_ctotal       = ATOMIC_INIT(0);
55 atomic_t vx_global_cactive      = ATOMIC_INIT(0);
56
57
58 /*      now inactive context structures */
59
60 static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
61
62 static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
63
64
65 /*      __alloc_vx_info()
66
67         * allocate an initialized vx_info struct
68         * doesn't make it visible (hash)                        */
69
70 static struct vx_info *__alloc_vx_info(xid_t xid)
71 {
72         struct vx_info *new = NULL;
73         int cpu;
74
75         vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
76
77         /* would this benefit from a slab cache? */
78         new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
79         if (!new)
80                 return 0;
81
82         memset (new, 0, sizeof(struct vx_info));
83 #ifdef CONFIG_SMP
84         new->ptr_pc = alloc_percpu(struct _vx_info_pc);
85         if (!new->ptr_pc)
86                 goto error;
87 #endif
88         new->vx_id = xid;
89         INIT_HLIST_NODE(&new->vx_hlist);
90         atomic_set(&new->vx_usecnt, 0);
91         atomic_set(&new->vx_tasks, 0);
92         new->vx_parent = NULL;
93         new->vx_state = 0;
94         init_waitqueue_head(&new->vx_wait);
95
96         /* prepare reaper */
97         get_task_struct(init_pid_ns.child_reaper);
98         new->vx_reaper = init_pid_ns.child_reaper;
99
100         /* rest of init goes here */
101         vx_info_init_limit(&new->limit);
102         vx_info_init_sched(&new->sched);
103         vx_info_init_cvirt(&new->cvirt);
104         vx_info_init_cacct(&new->cacct);
105
106         /* per cpu data structures */
107         for_each_possible_cpu(cpu) {
108                 vx_info_init_sched_pc(
109                         &vx_per_cpu(new, sched_pc, cpu), cpu);
110                 vx_info_init_cvirt_pc(
111                         &vx_per_cpu(new, cvirt_pc, cpu), cpu);
112         }
113
114         new->vx_flags = VXF_INIT_SET;
115         new->vx_bcaps = CAP_INIT_EFF_SET;
116         new->vx_ccaps = 0;
117         new->vx_cap_bset = cap_bset;
118
119         new->reboot_cmd = 0;
120         new->exit_code = 0;
121
122         vxdprintk(VXD_CBIT(xid, 0),
123                 "alloc_vx_info(%d) = %p", xid, new);
124         vxh_alloc_vx_info(new);
125         atomic_inc(&vx_global_ctotal);
126         return new;
127 #ifdef CONFIG_SMP
128 error:
129         kfree(new);
130         return 0;
131 #endif
132 }
133
134 /*      __dealloc_vx_info()
135
136         * final disposal of vx_info                             */
137
138 static void __dealloc_vx_info(struct vx_info *vxi)
139 {
140         int cpu;
141
142         vxdprintk(VXD_CBIT(xid, 0),
143                 "dealloc_vx_info(%p)", vxi);
144         vxh_dealloc_vx_info(vxi);
145
146         vxi->vx_id = -1;
147
148         vx_info_exit_limit(&vxi->limit);
149         vx_info_exit_sched(&vxi->sched);
150         vx_info_exit_cvirt(&vxi->cvirt);
151         vx_info_exit_cacct(&vxi->cacct);
152
153         for_each_possible_cpu(cpu) {
154                 vx_info_exit_sched_pc(
155                         &vx_per_cpu(vxi, sched_pc, cpu), cpu);
156                 vx_info_exit_cvirt_pc(
157                         &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
158         }
159
160         vxi->vx_state |= VXS_RELEASED;
161
162 #ifdef CONFIG_SMP
163         free_percpu(vxi->ptr_pc);
164 #endif
165         kfree(vxi);
166         atomic_dec(&vx_global_ctotal);
167 }
168
169 static void __shutdown_vx_info(struct vx_info *vxi)
170 {
171         struct nsproxy *nsproxy;
172         struct fs_struct *fs;
173
174         might_sleep();
175
176         vxi->vx_state |= VXS_SHUTDOWN;
177         vs_state_change(vxi, VSC_SHUTDOWN);
178
179         nsproxy = xchg(&vxi->vx_nsproxy, NULL);
180         fs = xchg(&vxi->vx_fs, NULL);
181
182         if (nsproxy)
183                 put_nsproxy(nsproxy);
184         if (fs)
185                 put_fs_struct(fs);
186 }
187
188 /* exported stuff */
189
190 void free_vx_info(struct vx_info *vxi)
191 {
192         unsigned long flags;
193
194         /* context shutdown is mandatory */
195         BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
196
197         BUG_ON(atomic_read(&vxi->vx_usecnt));
198         BUG_ON(atomic_read(&vxi->vx_tasks));
199
200         BUG_ON(vx_info_state(vxi, VXS_HASHED));
201
202         BUG_ON(vxi->vx_nsproxy);
203         BUG_ON(vxi->vx_fs);
204
205         spin_lock_irqsave(&vx_info_inactive_lock, flags);
206         hlist_del(&vxi->vx_hlist);
207         spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
208
209         __dealloc_vx_info(vxi);
210 }
211
212
213 /*      hash table for vx_info hash */
214
215 #define VX_HASH_SIZE    13
216
217 static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
218         { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
219
220 static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
221
222
223 static inline unsigned int __hashval(xid_t xid)
224 {
225         return (xid % VX_HASH_SIZE);
226 }
227
228
229
230 /*      __hash_vx_info()
231
232         * add the vxi to the global hash table
233         * requires the hash_lock to be held                     */
234
235 static inline void __hash_vx_info(struct vx_info *vxi)
236 {
237         struct hlist_head *head;
238
239         vxd_assert_lock(&vx_info_hash_lock);
240         vxdprintk(VXD_CBIT(xid, 4),
241                 "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
242         vxh_hash_vx_info(vxi);
243
244         /* context must not be hashed */
245         BUG_ON(vx_info_state(vxi, VXS_HASHED));
246
247         vxi->vx_state |= VXS_HASHED;
248         head = &vx_info_hash[__hashval(vxi->vx_id)];
249         hlist_add_head(&vxi->vx_hlist, head);
250         atomic_inc(&vx_global_cactive);
251 }
252
253 /*      __unhash_vx_info()
254
255         * remove the vxi from the global hash table
256         * requires the hash_lock to be held                     */
257
258 static inline void __unhash_vx_info(struct vx_info *vxi)
259 {
260         unsigned long flags;
261
262         vxd_assert_lock(&vx_info_hash_lock);
263         vxdprintk(VXD_CBIT(xid, 4),
264                 "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
265                 atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
266         vxh_unhash_vx_info(vxi);
267
268         /* context must be hashed */
269         BUG_ON(!vx_info_state(vxi, VXS_HASHED));
270         /* but without tasks */
271         BUG_ON(atomic_read(&vxi->vx_tasks));
272
273         vxi->vx_state &= ~VXS_HASHED;
274         hlist_del_init(&vxi->vx_hlist);
275         spin_lock_irqsave(&vx_info_inactive_lock, flags);
276         hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
277         spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
278         atomic_dec(&vx_global_cactive);
279 }
280
281
282 /*      __lookup_vx_info()
283
284         * requires the hash_lock to be held
285         * doesn't increment the vx_refcnt                       */
286
287 static inline struct vx_info *__lookup_vx_info(xid_t xid)
288 {
289         struct hlist_head *head = &vx_info_hash[__hashval(xid)];
290         struct hlist_node *pos;
291         struct vx_info *vxi;
292
293         vxd_assert_lock(&vx_info_hash_lock);
294         hlist_for_each(pos, head) {
295                 vxi = hlist_entry(pos, struct vx_info, vx_hlist);
296
297                 if (vxi->vx_id == xid)
298                         goto found;
299         }
300         vxi = NULL;
301 found:
302         vxdprintk(VXD_CBIT(xid, 0),
303                 "__lookup_vx_info(#%u): %p[#%u]",
304                 xid, vxi, vxi?vxi->vx_id:0);
305         vxh_lookup_vx_info(vxi, xid);
306         return vxi;
307 }
308
309
310 /*      __vx_dynamic_id()
311
312         * find unused dynamic xid
313         * requires the hash_lock to be held                     */
314
315 static inline xid_t __vx_dynamic_id(void)
316 {
317         static xid_t seq = MAX_S_CONTEXT;
318         xid_t barrier = seq;
319
320         vxd_assert_lock(&vx_info_hash_lock);
321         do {
322                 if (++seq > MAX_S_CONTEXT)
323                         seq = MIN_D_CONTEXT;
324                 if (!__lookup_vx_info(seq)) {
325                         vxdprintk(VXD_CBIT(xid, 4),
326                                 "__vx_dynamic_id: [#%d]", seq);
327                         return seq;
328                 }
329         } while (barrier != seq);
330         return 0;
331 }
332
333 #ifdef  CONFIG_VSERVER_LEGACY
334
335 /*      __loc_vx_info()
336
337         * locate or create the requested context
338         * get() it and if new hash it                           */
339
340 static struct vx_info * __loc_vx_info(int id, int *err)
341 {
342         struct vx_info *new, *vxi = NULL;
343
344         vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id);
345
346         if (!(new = __alloc_vx_info(id))) {
347                 *err = -ENOMEM;
348                 return NULL;
349         }
350
351         /* required to make dynamic xids unique */
352         spin_lock(&vx_info_hash_lock);
353
354         /* dynamic context requested */
355         if (id == VX_DYNAMIC_ID) {
356 #ifdef  CONFIG_VSERVER_DYNAMIC_IDS
357                 id = __vx_dynamic_id();
358                 if (!id) {
359                         printk(KERN_ERR "no dynamic context available.\n");
360                         goto out_unlock;
361                 }
362                 new->vx_id = id;
363 #else
364                 printk(KERN_ERR "dynamic contexts disabled.\n");
365                 goto out_unlock;
366 #endif
367         }
368         /* existing context requested */
369         else if ((vxi = __lookup_vx_info(id))) {
370                 /* context in setup is not available */
371                 if (vxi->vx_flags & VXF_STATE_SETUP) {
372                         vxdprintk(VXD_CBIT(xid, 0),
373                                 "loc_vx_info(%d) = %p (not available)", id, vxi);
374                         vxi = NULL;
375                         *err = -EBUSY;
376                 } else {
377                         vxdprintk(VXD_CBIT(xid, 0),
378                                 "loc_vx_info(%d) = %p (found)", id, vxi);
379                         get_vx_info(vxi);
380                         *err = 0;
381                 }
382                 goto out_unlock;
383         }
384
385         /* new context requested */
386         vxdprintk(VXD_CBIT(xid, 0),
387                 "loc_vx_info(%d) = %p (new)", id, new);
388         __hash_vx_info(get_vx_info(new));
389         vxi = new, new = NULL;
390         *err = 1;
391
392 out_unlock:
393         spin_unlock(&vx_info_hash_lock);
394         vxh_loc_vx_info(vxi, id);
395         if (new)
396                 __dealloc_vx_info(new);
397         return vxi;
398 }
399
400 #endif
401
402 /*      __create_vx_info()
403
404         * create the requested context
405         * get(), claim() and hash it                            */
406
407 static struct vx_info * __create_vx_info(int id)
408 {
409         struct vx_info *new, *vxi = NULL;
410
411         vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
412
413         if (!(new = __alloc_vx_info(id)))
414                 return ERR_PTR(-ENOMEM);
415
416         /* required to make dynamic xids unique */
417         spin_lock(&vx_info_hash_lock);
418
419         /* dynamic context requested */
420         if (id == VX_DYNAMIC_ID) {
421 #ifdef  CONFIG_VSERVER_DYNAMIC_IDS
422                 id = __vx_dynamic_id();
423                 if (!id) {
424                         printk(KERN_ERR "no dynamic context available.\n");
425                         vxi = ERR_PTR(-EAGAIN);
426                         goto out_unlock;
427                 }
428                 new->vx_id = id;
429 #else
430                 printk(KERN_ERR "dynamic contexts disabled.\n");
431                 vxi = ERR_PTR(-EINVAL);
432                 goto out_unlock;
433 #endif
434         }
435         /* static context requested */
436         else if ((vxi = __lookup_vx_info(id))) {
437                 vxdprintk(VXD_CBIT(xid, 0),
438                         "create_vx_info(%d) = %p (already there)", id, vxi);
439                 if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
440                         vxi = ERR_PTR(-EBUSY);
441                 else
442                         vxi = ERR_PTR(-EEXIST);
443                 goto out_unlock;
444         }
445 #ifdef  CONFIG_VSERVER_DYNAMIC_IDS
446         /* dynamic xid creation blocker */
447         else if (id >= MIN_D_CONTEXT) {
448                 vxdprintk(VXD_CBIT(xid, 0),
449                         "create_vx_info(%d) (dynamic rejected)", id);
450                 vxi = ERR_PTR(-EINVAL);
451                 goto out_unlock;
452         }
453 #endif
454
455         /* new context */
456         vxdprintk(VXD_CBIT(xid, 0),
457                 "create_vx_info(%d) = %p (new)", id, new);
458         claim_vx_info(new, NULL);
459         __hash_vx_info(get_vx_info(new));
460         vxi = new, new = NULL;
461
462 out_unlock:
463         spin_unlock(&vx_info_hash_lock);
464         vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id);
465         if (new)
466                 __dealloc_vx_info(new);
467         return vxi;
468 }
469
470
471 /*      exported stuff                                          */
472
473
474 void unhash_vx_info(struct vx_info *vxi)
475 {
476         __shutdown_vx_info(vxi);
477         spin_lock(&vx_info_hash_lock);
478         __unhash_vx_info(vxi);
479         spin_unlock(&vx_info_hash_lock);
480         __wakeup_vx_info(vxi);
481 }
482
483
484 /*      lookup_vx_info()
485
486         * search for a vx_info and get() it
487         * negative id means current                             */
488
489 struct vx_info *lookup_vx_info(int id)
490 {
491         struct vx_info *vxi = NULL;
492
493         if (id < 0) {
494                 vxi = get_vx_info(current->vx_info);
495         } else if (id > 1) {
496                 spin_lock(&vx_info_hash_lock);
497                 vxi = get_vx_info(__lookup_vx_info(id));
498                 spin_unlock(&vx_info_hash_lock);
499         }
500         return vxi;
501 }
502
503 /*      xid_is_hashed()
504
505         * verify that xid is still hashed                       */
506
507 int xid_is_hashed(xid_t xid)
508 {
509         int hashed;
510
511         spin_lock(&vx_info_hash_lock);
512         hashed = (__lookup_vx_info(xid) != NULL);
513         spin_unlock(&vx_info_hash_lock);
514         return hashed;
515 }
516
517 #ifdef  CONFIG_VSERVER_LEGACY
518
519 struct vx_info *lookup_or_create_vx_info(int id)
520 {
521         int err;
522
523         return __loc_vx_info(id, &err);
524 }
525
526 #endif
527
528 #ifdef  CONFIG_PROC_FS
529
530 /*      get_xid_list()
531
532         * get a subset of hashed xids for proc
533         * assumes size is at least one                          */
534
535 int get_xid_list(int index, unsigned int *xids, int size)
536 {
537         int hindex, nr_xids = 0;
538
539         /* only show current and children */
540         if (!vx_check(0, VS_ADMIN|VS_WATCH)) {
541                 if (index > 0)
542                         return 0;
543                 xids[nr_xids] = vx_current_xid();
544                 return 1;
545         }
546
547         for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
548                 struct hlist_head *head = &vx_info_hash[hindex];
549                 struct hlist_node *pos;
550
551                 spin_lock(&vx_info_hash_lock);
552                 hlist_for_each(pos, head) {
553                         struct vx_info *vxi;
554
555                         if (--index > 0)
556                                 continue;
557
558                         vxi = hlist_entry(pos, struct vx_info, vx_hlist);
559                         xids[nr_xids] = vxi->vx_id;
560                         if (++nr_xids >= size) {
561                                 spin_unlock(&vx_info_hash_lock);
562                                 goto out;
563                         }
564                 }
565                 /* keep the lock time short */
566                 spin_unlock(&vx_info_hash_lock);
567         }
568 out:
569         return nr_xids;
570 }
571 #endif
572
573 #ifdef  CONFIG_VSERVER_DEBUG
574
575 void    dump_vx_info_inactive(int level)
576 {
577         struct hlist_node *entry, *next;
578
579         hlist_for_each_safe(entry, next, &vx_info_inactive) {
580                 struct vx_info *vxi =
581                         list_entry(entry, struct vx_info, vx_hlist);
582
583                 dump_vx_info(vxi, level);
584         }
585 }
586
587 #endif
588
589 int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
590 {
591         struct user_struct *new_user, *old_user;
592
593         if (!p || !vxi)
594                 BUG();
595
596         if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
597                 return -EACCES;
598
599         new_user = alloc_uid(vxi->vx_id, p->uid);
600         if (!new_user)
601                 return -ENOMEM;
602
603         old_user = p->user;
604         if (new_user != old_user) {
605                 atomic_inc(&new_user->processes);
606                 atomic_dec(&old_user->processes);
607                 p->user = new_user;
608         }
609         free_uid(old_user);
610         return 0;
611 }
612
613 void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
614 {
615         p->cap_effective &= vxi->vx_cap_bset;
616         p->cap_inheritable &= vxi->vx_cap_bset;
617         p->cap_permitted &= vxi->vx_cap_bset;
618 }
619
620
621 #include <linux/file.h>
622
623 static int vx_openfd_task(struct task_struct *tsk)
624 {
625         struct files_struct *files = tsk->files;
626         struct fdtable *fdt;
627         const unsigned long *bptr;
628         int count, total;
629
630         /* no rcu_read_lock() because of spin_lock() */
631         spin_lock(&files->file_lock);
632         fdt = files_fdtable(files);
633         bptr = fdt->open_fds->fds_bits;
634         count = fdt->max_fds / (sizeof(unsigned long) * 8);
635         for (total = 0; count > 0; count--) {
636                 if (*bptr)
637                         total += hweight_long(*bptr);
638                 bptr++;
639         }
640         spin_unlock(&files->file_lock);
641         return total;
642 }
643
644
645 /*      for *space compatibility */
646
647 asmlinkage long sys_unshare(unsigned long);
648
649 /*
650  *      migrate task to new context
651  *      gets vxi, puts old_vxi on change
652  *      optionally unshares namespaces (hack)
653  */
654
655 int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
656 {
657         struct vx_info *old_vxi;
658         int ret = 0;
659
660         if (!p || !vxi)
661                 BUG();
662
663         vxdprintk(VXD_CBIT(xid, 5),
664                 "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
665                 vxi->vx_id, atomic_read(&vxi->vx_usecnt));
666
667         if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
668                 !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
669                 return -EACCES;
670
671         if (vx_info_state(vxi, VXS_SHUTDOWN))
672                 return -EFAULT;
673
674         old_vxi = task_get_vx_info(p);
675         if (old_vxi == vxi)
676                 goto out;
677
678         if (!(ret = vx_migrate_user(p, vxi))) {
679                 int openfd;
680
681                 task_lock(p);
682                 openfd = vx_openfd_task(p);
683
684                 if (old_vxi) {
685                         atomic_dec(&old_vxi->cvirt.nr_threads);
686                         atomic_dec(&old_vxi->cvirt.nr_running);
687                         __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
688                         /* FIXME: what about the struct files here? */
689                         __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
690                         /* account for the executable */
691                         __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
692                 }
693                 atomic_inc(&vxi->cvirt.nr_threads);
694                 atomic_inc(&vxi->cvirt.nr_running);
695                 __rlim_inc(&vxi->limit, RLIMIT_NPROC);
696                 /* FIXME: what about the struct files here? */
697                 __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
698                 /* account for the executable */
699                 __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
700
701                 if (old_vxi) {
702                         release_vx_info(old_vxi, p);
703                         clr_vx_info(&p->vx_info);
704                 }
705                 claim_vx_info(vxi, p);
706                 set_vx_info(&p->vx_info, vxi);
707                 p->xid = vxi->vx_id;
708
709                 vxdprintk(VXD_CBIT(xid, 5),
710                         "moved task %p into vxi:%p[#%d]",
711                         p, vxi, vxi->vx_id);
712
713                 vx_mask_cap_bset(vxi, p);
714                 task_unlock(p);
715
716                 /* hack for *spaces to provide compatibility */
717                 if (unshare) {
718                         ret = sys_unshare(CLONE_NEWUTS|CLONE_NEWIPC);
719                         vx_set_space(vxi, CLONE_NEWUTS|CLONE_NEWIPC);
720                 }
721         }
722 out:
723         put_vx_info(old_vxi);
724         return ret;
725 }
726
727 int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
728 {
729         struct task_struct *old_reaper;
730
731         if (!vxi)
732                 return -EINVAL;
733
734         vxdprintk(VXD_CBIT(xid, 6),
735                 "vx_set_reaper(%p[#%d],%p[#%d,%d])",
736                 vxi, vxi->vx_id, p, p->xid, p->pid);
737
738         old_reaper = vxi->vx_reaper;
739         if (old_reaper == p)
740                 return 0;
741
742         /* set new child reaper */
743         get_task_struct(p);
744         vxi->vx_reaper = p;
745         put_task_struct(old_reaper);
746         return 0;
747 }
748
749 int vx_set_init(struct vx_info *vxi, struct task_struct *p)
750 {
751         if (!vxi)
752                 return -EINVAL;
753
754         vxdprintk(VXD_CBIT(xid, 6),
755                 "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
756                 vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
757
758         vxi->vx_flags &= ~VXF_STATE_INIT;
759         vxi->vx_initpid = p->tgid;
760         return 0;
761 }
762
763 void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
764 {
765         vxdprintk(VXD_CBIT(xid, 6),
766                 "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
767                 vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
768
769         vxi->exit_code = code;
770         vxi->vx_initpid = 0;
771 }
772
773
774 void vx_set_persistent(struct vx_info *vxi)
775 {
776         vxdprintk(VXD_CBIT(xid, 6),
777                 "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
778
779         get_vx_info(vxi);
780         claim_vx_info(vxi, NULL);
781 }
782
783 void vx_clear_persistent(struct vx_info *vxi)
784 {
785         vxdprintk(VXD_CBIT(xid, 6),
786                 "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
787
788         release_vx_info(vxi, NULL);
789         put_vx_info(vxi);
790 }
791
792 void vx_update_persistent(struct vx_info *vxi)
793 {
794         if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
795                 vx_set_persistent(vxi);
796         else
797                 vx_clear_persistent(vxi);
798 }
799
800
801 /*      task must be current or locked          */
802
803 void    exit_vx_info(struct task_struct *p, int code)
804 {
805         struct vx_info *vxi = p->vx_info;
806
807         if (vxi) {
808                 atomic_dec(&vxi->cvirt.nr_threads);
809                 vx_nproc_dec(p);
810
811                 vxi->exit_code = code;
812                 release_vx_info(vxi, p);
813         }
814 }
815
816 void    exit_vx_info_early(struct task_struct *p, int code)
817 {
818         struct vx_info *vxi = p->vx_info;
819
820         if (vxi) {
821                 if (vxi->vx_initpid == p->tgid)
822                         vx_exit_init(vxi, p, code);
823                 if (vxi->vx_reaper == p)
824                         vx_set_reaper(vxi, init_pid_ns.child_reaper);
825         }
826 }
827
828
829 /* vserver syscall commands below here */
830
831 /* taks xid and vx_info functions */
832
833 #include <asm/uaccess.h>
834
835
836 int vc_task_xid(uint32_t id, void __user *data)
837 {
838         xid_t xid;
839
840         if (id) {
841                 struct task_struct *tsk;
842
843                 if (!vx_check(0, VS_ADMIN|VS_WATCH))
844                         return -EPERM;
845
846                 read_lock(&tasklist_lock);
847                 tsk = find_task_by_real_pid(id);
848                 xid = (tsk) ? tsk->xid : -ESRCH;
849                 read_unlock(&tasklist_lock);
850         }
851         else
852                 xid = vx_current_xid();
853         return xid;
854 }
855
856
857 int vc_vx_info(struct vx_info *vxi, void __user *data)
858 {
859         struct vcmd_vx_info_v0 vc_data;
860
861         vc_data.xid = vxi->vx_id;
862         vc_data.initpid = vxi->vx_initpid;
863
864         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
865                 return -EFAULT;
866         return 0;
867 }
868
869
870 int vc_ctx_stat(struct vx_info *vxi, void __user *data)
871 {
872         struct vcmd_ctx_stat_v0 vc_data;
873
874         vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
875         vc_data.tasks = atomic_read(&vxi->vx_tasks);
876
877         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
878                 return -EFAULT;
879         return 0;
880 }
881
882
883 /* context functions */
884
885 int vc_ctx_create(uint32_t xid, void __user *data)
886 {
887         struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
888         struct vx_info *new_vxi;
889         int ret;
890
891         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
892                 return -EFAULT;
893
894         if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID))
895                 return -EINVAL;
896         if (xid < 2)
897                 return -EINVAL;
898
899         new_vxi = __create_vx_info(xid);
900         if (IS_ERR(new_vxi))
901                 return PTR_ERR(new_vxi);
902
903         /* initial flags */
904         new_vxi->vx_flags = vc_data.flagword;
905
906         ret = -ENOEXEC;
907         if (vs_state_change(new_vxi, VSC_STARTUP))
908                 goto out;
909
910         ret = vx_migrate_task(current, new_vxi, (!data));
911         if (ret)
912                 goto out;
913
914         /* return context id on success */
915         ret = new_vxi->vx_id;
916
917         /* get a reference for persistent contexts */
918         if ((vc_data.flagword & VXF_PERSISTENT))
919                 vx_set_persistent(new_vxi);
920 out:
921         release_vx_info(new_vxi, NULL);
922         put_vx_info(new_vxi);
923         return ret;
924 }
925
926
927 int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
928 {
929         struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
930         int ret;
931
932         if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
933                 return -EFAULT;
934
935         ret = vx_migrate_task(current, vxi, 0);
936         if (ret)
937                 return ret;
938         if (vc_data.flagword & VXM_SET_INIT)
939                 ret = vx_set_init(vxi, current);
940         if (ret)
941                 return ret;
942         if (vc_data.flagword & VXM_SET_REAPER)
943                 ret = vx_set_reaper(vxi, current);
944         return ret;
945 }
946
947
948 int vc_get_cflags(struct vx_info *vxi, void __user *data)
949 {
950         struct vcmd_ctx_flags_v0 vc_data;
951
952         vc_data.flagword = vxi->vx_flags;
953
954         /* special STATE flag handling */
955         vc_data.mask = vs_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
956
957         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
958                 return -EFAULT;
959         return 0;
960 }
961
962 int vc_set_cflags(struct vx_info *vxi, void __user *data)
963 {
964         struct vcmd_ctx_flags_v0 vc_data;
965         uint64_t mask, trigger;
966
967         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
968                 return -EFAULT;
969
970         /* special STATE flag handling */
971         mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
972         trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
973
974         if (vxi == current->vx_info) {
975                 if (trigger & VXF_STATE_SETUP)
976                         vx_mask_cap_bset(vxi, current);
977                 if (trigger & VXF_STATE_INIT) {
978                         int ret;
979
980                         ret = vx_set_init(vxi, current);
981                         if (ret)
982                                 return ret;
983                         ret = vx_set_reaper(vxi, current);
984                         if (ret)
985                                 return ret;
986                 }
987         }
988
989         vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
990                 vc_data.flagword, mask);
991         if (trigger & VXF_PERSISTENT)
992                 vx_update_persistent(vxi);
993
994         return 0;
995 }
996
997 static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
998 {
999         if (bcaps)
1000                 *bcaps = vxi->vx_bcaps;
1001         if (ccaps)
1002                 *ccaps = vxi->vx_ccaps;
1003
1004         return 0;
1005 }
1006
1007 int vc_get_ccaps_v0(struct vx_info *vxi, void __user *data)
1008 {
1009         struct vcmd_ctx_caps_v0 vc_data;
1010         int ret;
1011
1012         ret = do_get_caps(vxi, &vc_data.bcaps, &vc_data.ccaps);
1013         if (ret)
1014                 return ret;
1015         vc_data.cmask = ~0UL;
1016
1017         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
1018                 return -EFAULT;
1019         return 0;
1020 }
1021
1022 int vc_get_ccaps(struct vx_info *vxi, void __user *data)
1023 {
1024         struct vcmd_ctx_caps_v1 vc_data;
1025         int ret;
1026
1027         ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
1028         if (ret)
1029                 return ret;
1030         vc_data.cmask = ~0UL;
1031
1032         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
1033                 return -EFAULT;
1034         return 0;
1035 }
1036
1037 static int do_set_caps(struct vx_info *vxi,
1038         uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
1039 {
1040         vxi->vx_bcaps = vs_mask_flags(vxi->vx_bcaps, bcaps, bmask);
1041         vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
1042
1043         return 0;
1044 }
1045
1046 int vc_set_ccaps_v0(struct vx_info *vxi, void __user *data)
1047 {
1048         struct vcmd_ctx_caps_v0 vc_data;
1049
1050         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
1051                 return -EFAULT;
1052
1053         /* simulate old &= behaviour for bcaps */
1054         return do_set_caps(vxi, 0, ~vc_data.bcaps,
1055                 vc_data.ccaps, vc_data.cmask);
1056 }
1057
1058 int vc_set_ccaps(struct vx_info *vxi, void __user *data)
1059 {
1060         struct vcmd_ctx_caps_v1 vc_data;
1061
1062         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
1063                 return -EFAULT;
1064
1065         return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
1066 }
1067
1068 int vc_get_bcaps(struct vx_info *vxi, void __user *data)
1069 {
1070         struct vcmd_bcaps vc_data;
1071         int ret;
1072
1073         ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
1074         if (ret)
1075                 return ret;
1076         vc_data.bmask = ~0UL;
1077
1078         if (copy_to_user (data, &vc_data, sizeof(vc_data)))
1079                 return -EFAULT;
1080         return 0;
1081 }
1082
1083 int vc_set_bcaps(struct vx_info *vxi, void __user *data)
1084 {
1085         struct vcmd_bcaps vc_data;
1086
1087         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
1088                 return -EFAULT;
1089
1090         return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
1091 }
1092
1093 #include <linux/module.h>
1094
1095 EXPORT_SYMBOL_GPL(free_vx_info);
1096