2 * linux/kernel/vserver/context.c
4 * Virtual Server: Context Support
6 * Copyright (C) 2003-2004 Herbert Pƶtzl
9 * V0.02 vx_ctx_kill syscall command
10 * V0.03 replaced context_info calls
11 * V0.04 redesign of struct (de)alloc
12 * V0.05 rlimit basic implementation
13 * V0.06 task_xid and info commands
14 * V0.07 context flags and caps
18 #include <linux/config.h>
19 #include <linux/slab.h>
20 #include <linux/vserver/context.h>
21 #include <linux/vserver/legacy.h>
22 #include <linux/vinline.h>
23 #include <linux/kernel_stat.h>
24 #include <linux/namespace.h>
26 #include <asm/errno.h>
29 /* system functions */
34 spinlock_t vxlist_lock
35 __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
39 * struct vx_info allocation and deallocation
42 static struct vx_info *alloc_vx_info(int id)
44 struct vx_info *new = NULL;
46 vxdprintk("alloc_vx_info(%d)\n", id);
47 /* would this benefit from a slab cache? */
48 new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
52 memset (new, 0, sizeof(struct vx_info));
54 INIT_LIST_HEAD(&new->vx_list);
55 /* rest of init goes here */
57 vx_info_init_limit(&new->limit);
58 vx_info_init_sched(&new->sched);
59 vx_info_init_cvirt(&new->cvirt);
60 vx_info_init_cacct(&new->cacct);
62 new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
63 new->vx_bcaps = CAP_INIT_EFF_SET;
66 vxdprintk("alloc_vx_info(%d) = %p\n", id, new);
70 void free_vx_info(struct vx_info *vxi)
72 vxdprintk("free_vx_info(%p)\n", vxi);
73 if (vxi->vx_namespace)
74 put_namespace(vxi->vx_namespace);
76 put_fs_struct(vxi->vx_fs);
78 vx_info_exit_limit(&vxi->limit);
79 vx_info_exit_sched(&vxi->sched);
80 vx_info_exit_cvirt(&vxi->cvirt);
81 vx_info_exit_cacct(&vxi->cacct);
83 BUG_ON(atomic_read(&vxi->vx_refcount));
91 * struct vx_info search by id
92 * assumes vxlist_lock is held
95 static __inline__ struct vx_info *__find_vx_info(int id)
99 list_for_each_entry(vxi, &vx_infos, vx_list)
100 if (vxi->vx_id == id)
107 * struct vx_info ref stuff
110 struct vx_info *find_vx_info(int id)
115 vxi = current->vx_info;
118 spin_lock(&vxlist_lock);
119 if ((vxi = __find_vx_info(id)))
121 spin_unlock(&vxlist_lock);
127 * verify that id is a valid xid
130 int vx_info_id_valid(int id)
134 spin_lock(&vxlist_lock);
135 valid = (__find_vx_info(id) != NULL);
136 spin_unlock(&vxlist_lock);
142 * dynamic context id ...
145 static __inline__ xid_t __vx_dynamic_id(void)
147 static xid_t seq = MAX_S_CONTEXT;
151 if (++seq > MAX_S_CONTEXT)
153 if (!__find_vx_info(seq))
155 } while (barrier != seq);
159 static struct vx_info * __foc_vx_info(int id, int *err)
161 struct vx_info *new, *vxi = NULL;
163 vxdprintk("foc_vx_info(%d)\n", id);
164 if (!(new = alloc_vx_info(id))) {
169 /* dirty hack until Spectator becomes a cap */
170 if (id == 0 || id == 1) {
175 spin_lock(&vxlist_lock);
177 /* dynamic context requested */
178 if (id == VX_DYNAMIC_ID) {
179 id = __vx_dynamic_id();
181 printk(KERN_ERR "no dynamic context available.\n");
186 /* existing context requested */
187 else if ((vxi = __find_vx_info(id))) {
188 /* context in setup is not available */
189 if (vxi->vx_flags & VXF_STATE_SETUP) {
190 vxdprintk("foc_vx_info(%d) = %p (not available)\n", id, vxi);
194 vxdprintk("foc_vx_info(%d) = %p (found)\n", id, vxi);
201 /* new context requested */
202 vxdprintk("foc_vx_info(%d) = %p (new)\n", id, new);
203 atomic_set(&new->vx_refcount, 1);
204 list_add(&new->vx_list, &vx_infos);
205 vxi = new, new = NULL;
209 spin_unlock(&vxlist_lock);
216 struct vx_info *find_or_create_vx_info(int id)
220 return __foc_vx_info(id, &err);
224 int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
226 struct user_struct *new_user, *old_user;
230 new_user = alloc_uid(vxi->vx_id, p->uid);
235 if (new_user != old_user) {
236 atomic_inc(&new_user->processes);
237 atomic_dec(&old_user->processes);
244 void vx_mask_bcaps(struct task_struct *p)
246 struct vx_info *vxi = p->vx_info;
248 p->cap_effective &= vxi->vx_bcaps;
249 p->cap_inheritable &= vxi->vx_bcaps;
250 p->cap_permitted &= vxi->vx_bcaps;
254 #include <linux/file.h>
256 static inline int vx_nofiles_task(struct task_struct *tsk)
258 struct files_struct *files = tsk->files;
259 const unsigned long *obptr, *cbptr;
262 spin_lock(&files->file_lock);
263 obptr = files->open_fds->fds_bits;
264 cbptr = files->close_on_exec->fds_bits;
265 count = files->max_fds / (sizeof(unsigned long) * 8);
266 for (total = 0; count > 0; count--) {
268 total += hweight_long(*obptr);
271 total += hweight_long(*cbptr);
274 spin_unlock(&files->file_lock);
278 static inline int vx_openfd_task(struct task_struct *tsk)
280 struct files_struct *files = tsk->files;
281 const unsigned long *bptr;
284 spin_lock(&files->file_lock);
285 bptr = files->open_fds->fds_bits;
286 count = files->max_fds / (sizeof(unsigned long) * 8);
287 for (total = 0; count > 0; count--) {
289 total += hweight_long(*bptr);
292 spin_unlock(&files->file_lock);
297 * migrate task to new context
298 * gets vxi, puts old_vxi on change
301 int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
303 struct vx_info *old_vxi = task_get_vx_info(p);
309 vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
310 vxi->vx_id, atomic_read(&vxi->vx_refcount));
314 if (!(ret = vx_migrate_user(p, vxi))) {
317 atomic_dec(&old_vxi->cacct.nr_threads);
318 atomic_dec(&old_vxi->limit.res[RLIMIT_NPROC]);
320 atomic_inc(&vxi->cacct.nr_threads);
321 atomic_inc(&vxi->limit.res[RLIMIT_NPROC]);
322 atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]);
323 atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]);
324 set_vx_info(&p->vx_info, vxi);
329 put_vx_info(old_vxi);
332 put_vx_info(old_vxi);
336 int vx_set_init(struct vx_info *vxi, struct task_struct *p)
343 vxi->vx_initpid = p->tgid;
348 /* vserver syscall commands below here */
350 /* taks xid and vx_info functions */
352 #include <asm/uaccess.h>
355 int vc_task_xid(uint32_t id, void __user *data)
360 struct task_struct *tsk;
362 if (!vx_check(0, VX_ADMIN|VX_WATCH))
365 read_lock(&tasklist_lock);
366 tsk = find_task_by_pid(id);
367 xid = (tsk) ? tsk->xid : -ESRCH;
368 read_unlock(&tasklist_lock);
376 int vc_vx_info(uint32_t id, void __user *data)
379 struct vcmd_vx_info_v0 vc_data;
381 if (!vx_check(0, VX_ADMIN))
383 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
386 vxi = find_vx_info(id);
390 vc_data.xid = vxi->vx_id;
391 vc_data.initpid = vxi->vx_initpid;
394 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
400 /* context functions */
402 int vc_ctx_create(uint32_t xid, void __user *data)
404 // int ret = -ENOMEM;
405 struct vx_info *new_vxi;
408 if (!capable(CAP_SYS_ADMIN))
411 if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
417 new_vxi = __foc_vx_info(xid, &ret);
420 if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
425 ret = new_vxi->vx_id;
426 vx_migrate_task(current, new_vxi);
428 put_vx_info(new_vxi);
433 int vc_ctx_migrate(uint32_t id, void __user *data)
437 if (!capable(CAP_SYS_ADMIN))
440 /* dirty hack until Spectator becomes a cap */
446 vxi = find_vx_info(id);
449 vx_migrate_task(current, vxi);
455 int vc_get_cflags(uint32_t id, void __user *data)
458 struct vcmd_ctx_flags_v0 vc_data;
460 if (!capable(CAP_SYS_ADMIN))
463 vxi = find_vx_info(id);
467 vc_data.flagword = vxi->vx_flags;
469 // vc_data.mask = ~0UL;
470 /* special STATE flag handling */
471 vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
475 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
480 int vc_set_cflags(uint32_t id, void __user *data)
483 struct vcmd_ctx_flags_v0 vc_data;
484 uint64_t mask, trigger;
486 if (!capable(CAP_SYS_ADMIN))
488 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
491 vxi = find_vx_info(id);
495 /* special STATE flag handling */
496 mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
497 trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
499 if (trigger & VXF_STATE_SETUP)
500 vx_mask_bcaps(current);
501 if (trigger & VXF_STATE_INIT)
502 if (vxi == current->vx_info)
503 vx_set_init(vxi, current);
505 vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
506 vc_data.flagword, mask);
511 int vc_get_ccaps(uint32_t id, void __user *data)
514 struct vcmd_ctx_caps_v0 vc_data;
516 if (!capable(CAP_SYS_ADMIN))
519 vxi = find_vx_info(id);
523 vc_data.bcaps = vxi->vx_bcaps;
524 vc_data.ccaps = vxi->vx_ccaps;
525 vc_data.cmask = ~0UL;
528 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
533 int vc_set_ccaps(uint32_t id, void __user *data)
536 struct vcmd_ctx_caps_v0 vc_data;
538 if (!capable(CAP_SYS_ADMIN))
540 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
543 vxi = find_vx_info(id);
547 vxi->vx_bcaps &= vc_data.bcaps;
548 vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps,
549 vc_data.ccaps, vc_data.cmask);
554 #include <linux/module.h>
556 EXPORT_SYMBOL_GPL(free_vx_info);
557 EXPORT_SYMBOL_GPL(vxlist_lock);