fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / kernel / vserver / switch.c
1 /*
2  *  linux/kernel/vserver/switch.c
3  *
4  *  Virtual Server: Syscall Switch
5  *
6  *  Copyright (C) 2003-2007  Herbert Pƶtzl
7  *
8  *  V0.01  syscall switch
9  *  V0.02  added signal to context
10  *  V0.03  added rlimit functions
11  *  V0.04  added iattr, task/xid functions
12  *  V0.05  added debug/history stuff
13  *  V0.06  added compat32 layer
14  *  V0.07  vcmd args and perms
15  *  V0.08  added status commands
16  *
17  */
18
19 #include <linux/linkage.h>
20 #include <linux/sched.h>
21 #include <linux/compat.h>
22 #include <asm/errno.h>
23
24 #include <linux/vs_context.h>
25 #include <linux/vs_network.h>
26 #include <linux/vserver/switch.h>
27
28 #include "vci_config.h"
29
30 static inline
31 int vc_get_version(uint32_t id)
32 {
33 #ifdef  CONFIG_VSERVER_LEGACY_VERSION
34         if (id == 63)
35                 return VCI_LEGACY_VERSION;
36 #endif
37         return VCI_VERSION;
38 }
39
40 static inline
41 int vc_get_vci(uint32_t id)
42 {
43         return vci_kernel_config();
44 }
45
46 #include <linux/vserver/context_cmd.h>
47 #include <linux/vserver/cvirt_cmd.h>
48 #include <linux/vserver/cacct_cmd.h>
49 #include <linux/vserver/limit_cmd.h>
50 #include <linux/vserver/network_cmd.h>
51 #include <linux/vserver/sched_cmd.h>
52 #include <linux/vserver/debug_cmd.h>
53 #include <linux/vserver/inode_cmd.h>
54 #include <linux/vserver/dlimit_cmd.h>
55 #include <linux/vserver/signal_cmd.h>
56 #include <linux/vserver/space_cmd.h>
57
58 #include <linux/vserver/legacy.h>
59 #include <linux/vserver/inode.h>
60 #include <linux/vserver/dlimit.h>
61
62
63 #ifdef  CONFIG_COMPAT
64 #define __COMPAT(name, id, data, compat)        \
65         (compat) ? name ## _x32 (id, data) : name (id, data)
66 #else
67 #define __COMPAT(name, id, data, compat)        \
68         name (id, data)
69 #endif
70
71
72 static inline
73 long do_vcmd(uint32_t cmd, uint32_t id,
74         struct vx_info *vxi, struct nx_info *nxi,
75         void __user *data, int compat)
76 {
77         switch (cmd) {
78
79         case VCMD_get_version:
80                 return vc_get_version(id);
81         case VCMD_get_vci:
82                 return vc_get_vci(id);
83
84         case VCMD_task_xid:
85                 return vc_task_xid(id, data);
86         case VCMD_vx_info:
87                 return vc_vx_info(vxi, data);
88
89         case VCMD_task_nid:
90                 return vc_task_nid(id, data);
91         case VCMD_nx_info:
92                 return vc_nx_info(nxi, data);
93
94         case VCMD_set_space_v0:
95         /* this is version 1 */
96         case VCMD_set_space:
97                 return vc_set_space(vxi, data);
98
99         case VCMD_get_space_mask:
100                 return vc_get_space_mask(vxi, data);
101
102 #ifdef  CONFIG_IA32_EMULATION
103         case VCMD_get_rlimit:
104                 return __COMPAT(vc_get_rlimit, vxi, data, compat);
105         case VCMD_set_rlimit:
106                 return __COMPAT(vc_set_rlimit, vxi, data, compat);
107 #else
108         case VCMD_get_rlimit:
109                 return vc_get_rlimit(vxi, data);
110         case VCMD_set_rlimit:
111                 return vc_set_rlimit(vxi, data);
112 #endif
113         case VCMD_get_rlimit_mask:
114                 return vc_get_rlimit_mask(id, data);
115         case VCMD_reset_minmax:
116                 return vc_reset_minmax(vxi, data);
117
118         case VCMD_get_vhi_name:
119                 return vc_get_vhi_name(vxi, data);
120         case VCMD_set_vhi_name:
121                 return vc_set_vhi_name(vxi, data);
122
123         case VCMD_ctx_stat:
124                 return vc_ctx_stat(vxi, data);
125         case VCMD_virt_stat:
126                 return vc_virt_stat(vxi, data);
127         case VCMD_sock_stat:
128                 return vc_sock_stat(vxi, data);
129         case VCMD_rlimit_stat:
130                 return vc_rlimit_stat(vxi, data);
131
132         case VCMD_set_cflags:
133                 return vc_set_cflags(vxi, data);
134         case VCMD_get_cflags:
135                 return vc_get_cflags(vxi, data);
136
137         case VCMD_set_ccaps_v0:
138                 return vc_set_ccaps_v0(vxi, data);
139         /* this is version 1 */
140         case VCMD_set_ccaps:
141                 return vc_set_ccaps(vxi, data);
142         case VCMD_get_ccaps_v0:
143                 return vc_get_ccaps_v0(vxi, data);
144         /* this is version 1 */
145         case VCMD_get_ccaps:
146                 return vc_get_ccaps(vxi, data);
147         case VCMD_set_bcaps:
148                 return vc_set_bcaps(vxi, data);
149         case VCMD_get_bcaps:
150                 return vc_get_bcaps(vxi, data);
151
152         case VCMD_set_nflags:
153                 return vc_set_nflags(nxi, data);
154         case VCMD_get_nflags:
155                 return vc_get_nflags(nxi, data);
156
157         case VCMD_set_ncaps:
158                 return vc_set_ncaps(nxi, data);
159         case VCMD_get_ncaps:
160                 return vc_get_ncaps(nxi, data);
161
162 #ifdef  CONFIG_VSERVER_LEGACY
163         case VCMD_set_sched_v2:
164                 return vc_set_sched_v2(vxi, data);
165 #endif
166         case VCMD_set_sched_v3:
167                 return vc_set_sched_v3(vxi, data);
168         case VCMD_set_sched_v4:
169                 return vc_set_sched_v4(vxi, data);
170         /* this is version 5 */
171         case VCMD_set_sched:
172                 return vc_set_sched(vxi, data);
173         case VCMD_get_sched:
174                 return vc_get_sched(vxi, data);
175         case VCMD_sched_info:
176                 return vc_sched_info(vxi, data);
177
178         case VCMD_add_dlimit:
179                 return __COMPAT(vc_add_dlimit, id, data, compat);
180         case VCMD_rem_dlimit:
181                 return __COMPAT(vc_rem_dlimit, id, data, compat);
182         case VCMD_set_dlimit:
183                 return __COMPAT(vc_set_dlimit, id, data, compat);
184         case VCMD_get_dlimit:
185                 return __COMPAT(vc_get_dlimit, id, data, compat);
186
187         case VCMD_ctx_kill:
188                 return vc_ctx_kill(vxi, data);
189
190         case VCMD_wait_exit:
191                 return vc_wait_exit(vxi, data);
192
193 #ifdef  CONFIG_VSERVER_LEGACY
194         case VCMD_create_context:
195                 return vc_ctx_create(id, NULL);
196 #endif
197
198         case VCMD_get_iattr:
199                 return __COMPAT(vc_get_iattr, id, data, compat);
200         case VCMD_set_iattr:
201                 return __COMPAT(vc_set_iattr, id, data, compat);
202
203         case VCMD_enter_space_v0:
204                 return vc_enter_space(vxi, NULL);
205         /* this is version 1 */
206         case VCMD_enter_space:
207                 return vc_enter_space(vxi, data);
208
209         case VCMD_ctx_create_v0:
210                 return vc_ctx_create(id, NULL);
211         case VCMD_ctx_create:
212                 return vc_ctx_create(id, data);
213         case VCMD_ctx_migrate_v0:
214                 return vc_ctx_migrate(vxi, NULL);
215         case VCMD_ctx_migrate:
216                 return vc_ctx_migrate(vxi, data);
217
218         case VCMD_net_create_v0:
219                 return vc_net_create(id, NULL);
220         case VCMD_net_create:
221                 return vc_net_create(id, data);
222         case VCMD_net_migrate:
223                 return vc_net_migrate(nxi, data);
224         case VCMD_net_add:
225                 return vc_net_add(nxi, data);
226         case VCMD_net_remove:
227                 return vc_net_remove(nxi, data);
228
229 #ifdef  CONFIG_VSERVER_HISTORY
230         case VCMD_dump_history:
231                 return vc_dump_history(id);
232         case VCMD_read_history:
233                 return __COMPAT(vc_read_history, id, data, compat);
234 #endif
235 #ifdef  CONFIG_VSERVER_MONITOR
236         case VCMD_read_monitor:
237                 return __COMPAT(vc_read_monitor, id, data, compat);
238 #endif
239 #ifdef  CONFIG_VSERVER_LEGACY
240         case VCMD_new_s_context:
241                 return vc_new_s_context(id, data);
242 #endif
243 #ifdef  CONFIG_VSERVER_LEGACYNET
244         case VCMD_set_ipv4root:
245                 return vc_set_ipv4root(id, data);
246 #endif
247         default:
248                 vxwprintk(1, "unimplemented VCMD_%02d_%d[%d]",
249                         VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
250         }
251         return -ENOSYS;
252 }
253
254
255 #define __VCMD(vcmd, _perm, _args, _flags)              \
256         case VCMD_ ## vcmd: perm = _perm;               \
257                 args = _args; flags = _flags; break
258
259
260 #define VCA_NONE        0x00
261 #define VCA_VXI         0x01
262 #define VCA_NXI         0x02
263
264 #define VCF_NONE        0x00
265 #define VCF_INFO        0x01
266 #define VCF_ADMIN       0x02
267 #define VCF_ARES        0x06    /* includes admin */
268 #define VCF_SETUP       0x08
269
270 #define VCF_ZIDOK       0x10    /* zero id okay */
271
272
273 static inline
274 long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
275 {
276         long ret;
277         int permit = -1, state = 0;
278         int perm = -1, args = 0, flags = 0;
279         struct vx_info *vxi = NULL;
280         struct nx_info *nxi = NULL;
281
282         switch (cmd) {
283         /* unpriviledged commands */
284         __VCMD(get_version,      0, VCA_NONE,   0);
285         __VCMD(get_vci,          0, VCA_NONE,   0);
286         __VCMD(get_rlimit_mask,  0, VCA_NONE,   0);
287         __VCMD(get_space_mask,   0, VCA_NONE,   0);
288
289         /* info commands */
290         __VCMD(task_xid,         2, VCA_NONE,   0);
291         __VCMD(reset_minmax,     2, VCA_VXI,    0);
292         __VCMD(vx_info,          3, VCA_VXI,    VCF_INFO);
293         __VCMD(get_bcaps,        3, VCA_VXI,    VCF_INFO);
294         __VCMD(get_ccaps_v0,     3, VCA_VXI,    VCF_INFO);
295         __VCMD(get_ccaps,        3, VCA_VXI,    VCF_INFO);
296         __VCMD(get_cflags,       3, VCA_VXI,    VCF_INFO);
297         __VCMD(get_vhi_name,     3, VCA_VXI,    VCF_INFO);
298         __VCMD(get_rlimit,       3, VCA_VXI,    VCF_INFO);
299
300         __VCMD(ctx_stat,         3, VCA_VXI,    VCF_INFO);
301         __VCMD(virt_stat,        3, VCA_VXI,    VCF_INFO);
302         __VCMD(sock_stat,        3, VCA_VXI,    VCF_INFO);
303         __VCMD(rlimit_stat,      3, VCA_VXI,    VCF_INFO);
304
305         __VCMD(task_nid,         2, VCA_NONE,   0);
306         __VCMD(nx_info,          3, VCA_NXI,    VCF_INFO);
307         __VCMD(get_ncaps,        3, VCA_NXI,    VCF_INFO);
308         __VCMD(get_nflags,       3, VCA_NXI,    VCF_INFO);
309
310         __VCMD(get_iattr,        2, VCA_NONE,   0);
311         __VCMD(get_dlimit,       3, VCA_NONE,   VCF_INFO);
312         __VCMD(get_sched,        3, VCA_VXI,    VCF_INFO);
313         __VCMD(sched_info,       3, VCA_VXI,    VCF_INFO|VCF_ZIDOK);
314
315         /* lower admin commands */
316         __VCMD(wait_exit,        4, VCA_VXI,    VCF_INFO);
317         __VCMD(ctx_create_v0,    5, VCA_NONE,   0);
318         __VCMD(ctx_create,       5, VCA_NONE,   0);
319         __VCMD(ctx_migrate_v0,   5, VCA_VXI,    VCF_ADMIN);
320         __VCMD(ctx_migrate,      5, VCA_VXI,    VCF_ADMIN);
321         __VCMD(enter_space_v0,   5, VCA_VXI,    VCF_ADMIN);
322         __VCMD(enter_space,      5, VCA_VXI,    VCF_ADMIN);
323
324         __VCMD(net_create_v0,    5, VCA_NONE,   0);
325         __VCMD(net_create,       5, VCA_NONE,   0);
326         __VCMD(net_migrate,      5, VCA_NXI,    VCF_ADMIN);
327
328         /* higher admin commands */
329         __VCMD(ctx_kill,         6, VCA_VXI,    VCF_ARES);
330         __VCMD(set_space_v0,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
331         __VCMD(set_space,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
332
333         __VCMD(set_ccaps_v0,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
334         __VCMD(set_ccaps,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
335         __VCMD(set_bcaps,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
336         __VCMD(set_cflags,       7, VCA_VXI,    VCF_ARES|VCF_SETUP);
337
338         __VCMD(set_vhi_name,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
339         __VCMD(set_rlimit,       7, VCA_VXI,    VCF_ARES|VCF_SETUP);
340         __VCMD(set_sched,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
341         __VCMD(set_sched_v2,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
342         __VCMD(set_sched_v3,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
343         __VCMD(set_sched_v4,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
344
345         __VCMD(set_ncaps,        7, VCA_NXI,    VCF_ARES|VCF_SETUP);
346         __VCMD(set_nflags,       7, VCA_NXI,    VCF_ARES|VCF_SETUP);
347         __VCMD(net_add,          8, VCA_NXI,    VCF_ARES|VCF_SETUP);
348         __VCMD(net_remove,       8, VCA_NXI,    VCF_ARES|VCF_SETUP);
349
350         __VCMD(set_iattr,        7, VCA_NONE,   0);
351         __VCMD(set_dlimit,       7, VCA_NONE,   VCF_ARES);
352         __VCMD(add_dlimit,       8, VCA_NONE,   VCF_ARES);
353         __VCMD(rem_dlimit,       8, VCA_NONE,   VCF_ARES);
354
355         /* debug level admin commands */
356 #ifdef  CONFIG_VSERVER_HISTORY
357         __VCMD(dump_history,     9, VCA_NONE,   0);
358         __VCMD(read_history,     9, VCA_NONE,   0);
359 #endif
360 #ifdef  CONFIG_VSERVER_MONITOR
361         __VCMD(read_monitor,     9, VCA_NONE,   0);
362 #endif
363
364         /* legacy commands */
365 #ifdef  CONFIG_VSERVER_LEGACY
366         __VCMD(new_s_context,    1, VCA_NONE,   0);
367         __VCMD(create_context,   5, VCA_NONE,   0);
368 #endif
369 #ifdef  CONFIG_VSERVER_LEGACYNET
370         __VCMD(set_ipv4root,     5, VCA_NONE,   0);
371 #endif
372         default:
373                 perm = -1;
374         }
375
376         vxdprintk(VXD_CBIT(switch, 0),
377                 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
378                 VC_CATEGORY(cmd), VC_COMMAND(cmd),
379                 VC_VERSION(cmd), id, data, compat,
380                 perm, args, flags);
381
382         ret = -ENOSYS;
383         if (perm < 0)
384                 goto out;
385
386         state = 1;
387 #ifdef  CONFIG_VSERVER_LEGACY
388         if (!capable(CAP_CONTEXT) &&
389                 /* dirty hack for capremove */
390                 !(cmd==VCMD_new_s_context && id==-2))
391                 goto out;
392 #else
393         if (!capable(CAP_CONTEXT))
394                 goto out;
395 #endif
396
397         state = 2;
398         /* moved here from the individual commands */
399         ret = -EPERM;
400         if ((perm > 1) && !capable(CAP_SYS_ADMIN))
401                 goto out;
402
403         state = 3;
404         /* vcmd involves resource management  */
405         ret = -EPERM;
406         if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
407                 goto out;
408
409         state = 4;
410         /* various legacy exceptions */
411         switch (cmd) {
412 #ifdef  CONFIG_VSERVER_LEGACY
413         case VCMD_set_cflags:
414         case VCMD_set_ccaps_v0:
415                 ret = 0;
416                 if (vx_check(0, VS_WATCH))
417                         goto out;
418                 break;
419
420         case VCMD_ctx_create_v0:
421 #endif
422         /* will go away when spectator is a cap */
423         case VCMD_ctx_migrate_v0:
424         case VCMD_ctx_migrate:
425                 if (id == 1) {
426                         current->xid = 1;
427                         ret = 1;
428                         goto out;
429                 }
430                 break;
431
432         /* will go away when spectator is a cap */
433         case VCMD_net_migrate:
434                 if (id == 1) {
435                         current->nid = 1;
436                         ret = 1;
437                         goto out;
438                 }
439                 break;
440
441         /* legacy special casing */
442         case VCMD_set_space_v0:
443                 id = -1;
444                 break;
445         }
446
447         /* vcmds are fine by default */
448         permit = 1;
449
450         /* admin type vcmds require admin ... */
451         if (flags & VCF_ADMIN)
452                 permit = vx_check(0, VS_ADMIN) ? 1 : 0;
453
454         /* ... but setup type vcmds override that */
455         if (!permit && (flags & VCF_SETUP))
456                 permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
457
458         state = 5;
459         ret = -EPERM;
460         if (!permit)
461                 goto out;
462
463         state = 6;
464         if (!id && (flags & VCF_ZIDOK))
465                 goto skip_id;
466
467         ret = -ESRCH;
468         if (args & VCA_VXI) {
469                 vxi = lookup_vx_info(id);
470                 if (!vxi)
471                         goto out;
472
473                 if ((flags & VCF_ADMIN) &&
474                         /* special case kill for shutdown */
475                         (cmd != VCMD_ctx_kill) &&
476                         /* can context be administrated? */
477                         !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
478                         ret = -EACCES;
479                         goto out_vxi;
480                 }
481         }
482         state = 7;
483         if (args & VCA_NXI) {
484                 nxi = lookup_nx_info(id);
485                 if (!nxi)
486                         goto out_vxi;
487
488                 if ((flags & VCF_ADMIN) &&
489                         /* can context be administrated? */
490                         !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
491                         ret = -EACCES;
492                         goto out_nxi;
493                 }
494         }
495 skip_id:
496         state = 8;
497         ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
498
499 out_nxi:
500         if ((args & VCA_NXI) && nxi)
501                 put_nx_info(nxi);
502 out_vxi:
503         if ((args & VCA_VXI) && vxi)
504                 put_vx_info(vxi);
505 out:
506         vxdprintk(VXD_CBIT(switch, 1),
507                 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
508                 VC_CATEGORY(cmd), VC_COMMAND(cmd),
509                 VC_VERSION(cmd), ret, ret, state, permit);
510         return ret;
511 }
512
513 asmlinkage long
514 sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
515 {
516         return do_vserver(cmd, id, data, 0);
517 }
518
519 #ifdef  CONFIG_COMPAT
520
521 asmlinkage long
522 sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
523 {
524         return do_vserver(cmd, id, data, 1);
525 }
526
527 #endif  /* CONFIG_COMPAT */