In BuildKernel() update from
[linux-2.6.git] / kernel / vserver / switch.c
1 /*
2  *  linux/kernel/vserver/switch.c
3  *
4  *  Virtual Server: Syscall Switch
5  *
6  *  Copyright (C) 2003-2007  Herbert Pƶtzl
7  *
8  *  V0.01  syscall switch
9  *  V0.02  added signal to context
10  *  V0.03  added rlimit functions
11  *  V0.04  added iattr, task/xid functions
12  *  V0.05  added debug/history stuff
13  *  V0.06  added compat32 layer
14  *  V0.07  vcmd args and perms
15  *  V0.08  added status commands
16  *
17  */
18
19 #include <linux/linkage.h>
20 #include <linux/sched.h>
21 #include <linux/compat.h>
22 #include <asm/errno.h>
23
24 #include <linux/vs_context.h>
25 #include <linux/vs_network.h>
26 #include <linux/vserver/switch.h>
27
28 #include "vci_config.h"
29
30 static inline
31 int vc_get_version(uint32_t id)
32 {
33 #ifdef  CONFIG_VSERVER_LEGACY_VERSION
34         if (id == 63)
35                 return VCI_LEGACY_VERSION;
36 #endif
37         return VCI_VERSION;
38 }
39
40 static inline
41 int vc_get_vci(uint32_t id)
42 {
43         return vci_kernel_config();
44 }
45
46 #include <linux/vserver/context_cmd.h>
47 #include <linux/vserver/cvirt_cmd.h>
48 #include <linux/vserver/cacct_cmd.h>
49 #include <linux/vserver/limit_cmd.h>
50 #include <linux/vserver/network_cmd.h>
51 #include <linux/vserver/sched_cmd.h>
52 #include <linux/vserver/debug_cmd.h>
53 #include <linux/vserver/inode_cmd.h>
54 #include <linux/vserver/dlimit_cmd.h>
55 #include <linux/vserver/signal_cmd.h>
56 #include <linux/vserver/space_cmd.h>
57
58 #include <linux/vserver/legacy.h>
59 #include <linux/vserver/inode.h>
60 #include <linux/vserver/dlimit.h>
61
62
63 #ifdef  CONFIG_COMPAT
64 #define __COMPAT(name, id, data, compat)        \
65         (compat) ? name ## _x32 (id, data) : name (id, data)
66 #else
67 #define __COMPAT(name, id, data, compat)        \
68         name (id, data)
69 #endif
70
71
72 static inline
73 long do_vcmd(uint32_t cmd, uint32_t id,
74         struct vx_info *vxi, struct nx_info *nxi,
75         void __user *data, int compat)
76 {
77         switch (cmd) {
78
79         case VCMD_get_version:
80                 return vc_get_version(id);
81         case VCMD_get_vci:
82                 return vc_get_vci(id);
83
84         case VCMD_task_xid:
85                 return vc_task_xid(id, data);
86         case VCMD_vx_info:
87                 return vc_vx_info(vxi, data);
88
89         case VCMD_task_nid:
90                 return vc_task_nid(id, data);
91         case VCMD_nx_info:
92                 return vc_nx_info(nxi, data);
93
94         case VCMD_set_space_v0:
95         /* this is version 1 */
96         case VCMD_set_space:
97                 return vc_set_space(vxi, data);
98
99         case VCMD_get_space_mask:
100                 return vc_get_space_mask(vxi, data);
101
102 #ifdef  CONFIG_IA32_EMULATION
103         case VCMD_get_rlimit:
104                 return __COMPAT(vc_get_rlimit, vxi, data, compat);
105         case VCMD_set_rlimit:
106                 return __COMPAT(vc_set_rlimit, vxi, data, compat);
107 #else
108         case VCMD_get_rlimit:
109                 return vc_get_rlimit(vxi, data);
110         case VCMD_set_rlimit:
111                 return vc_set_rlimit(vxi, data);
112 #endif
113         case VCMD_get_rlimit_mask:
114                 return vc_get_rlimit_mask(id, data);
115         case VCMD_reset_minmax:
116                 return vc_reset_minmax(vxi, data);
117
118         case VCMD_get_vhi_name:
119                 return vc_get_vhi_name(vxi, data);
120         case VCMD_set_vhi_name:
121                 return vc_set_vhi_name(vxi, data);
122
123         case VCMD_ctx_stat:
124                 return vc_ctx_stat(vxi, data);
125         case VCMD_virt_stat:
126                 return vc_virt_stat(vxi, data);
127         case VCMD_sock_stat:
128                 return vc_sock_stat(vxi, data);
129         case VCMD_rlimit_stat:
130                 return vc_rlimit_stat(vxi, data);
131
132         case VCMD_set_cflags:
133                 return vc_set_cflags(vxi, data);
134         case VCMD_get_cflags:
135                 return vc_get_cflags(vxi, data);
136
137         case VCMD_set_ccaps_v0:
138                 return vc_set_ccaps_v0(vxi, data);
139         /* this is version 1 */
140         case VCMD_set_ccaps:
141                 return vc_set_ccaps(vxi, data);
142         case VCMD_get_ccaps_v0:
143                 return vc_get_ccaps_v0(vxi, data);
144         /* this is version 1 */
145         case VCMD_get_ccaps:
146                 return vc_get_ccaps(vxi, data);
147         case VCMD_set_bcaps:
148                 return vc_set_bcaps(vxi, data);
149         case VCMD_get_bcaps:
150                 return vc_get_bcaps(vxi, data);
151
152         case VCMD_set_nflags:
153                 return vc_set_nflags(nxi, data);
154         case VCMD_get_nflags:
155                 return vc_get_nflags(nxi, data);
156
157         case VCMD_set_ncaps:
158                 return vc_set_ncaps(nxi, data);
159         case VCMD_get_ncaps:
160                 return vc_get_ncaps(nxi, data);
161
162 #ifdef  CONFIG_VSERVER_LEGACY
163         case VCMD_set_sched_v2:
164                 return vc_set_sched_v2(vxi, data);
165 #endif
166         case VCMD_set_sched_v3:
167                 return vc_set_sched_v3(vxi, data);
168         case VCMD_set_sched_v4:
169                 return vc_set_sched_v4(vxi, data);
170         /* this is version 5 */
171         case VCMD_set_sched:
172                 return vc_set_sched(vxi, data);
173         case VCMD_get_sched:
174                 return vc_get_sched(vxi, data);
175         case VCMD_sched_info:
176                 return vc_sched_info(vxi, data);
177
178         case VCMD_add_dlimit:
179                 return __COMPAT(vc_add_dlimit, id, data, compat);
180         case VCMD_rem_dlimit:
181                 return __COMPAT(vc_rem_dlimit, id, data, compat);
182         case VCMD_set_dlimit:
183                 return __COMPAT(vc_set_dlimit, id, data, compat);
184         case VCMD_get_dlimit:
185                 return __COMPAT(vc_get_dlimit, id, data, compat);
186
187         case VCMD_ctx_kill:
188                 return vc_ctx_kill(vxi, data);
189
190         case VCMD_wait_exit:
191                 return vc_wait_exit(vxi, data);
192
193 #ifdef  CONFIG_VSERVER_LEGACY
194         case VCMD_create_context:
195                 return vc_ctx_create(id, NULL);
196 #endif
197
198         case VCMD_get_iattr:
199                 return __COMPAT(vc_get_iattr, id, data, compat);
200         case VCMD_set_iattr:
201                 return __COMPAT(vc_set_iattr, id, data, compat);
202
203         case VCMD_fget_iattr:
204                 return vc_fget_iattr(id, data);
205         case VCMD_fset_iattr:
206                 return vc_fset_iattr(id, data);
207
208         case VCMD_enter_space_v0:
209                 return vc_enter_space(vxi, NULL);
210         /* this is version 1 */
211         case VCMD_enter_space:
212                 return vc_enter_space(vxi, data);
213
214         case VCMD_ctx_create_v0:
215                 return vc_ctx_create(id, NULL);
216         case VCMD_ctx_create:
217                 return vc_ctx_create(id, data);
218         case VCMD_ctx_migrate_v0:
219                 return vc_ctx_migrate(vxi, NULL);
220         case VCMD_ctx_migrate:
221                 return vc_ctx_migrate(vxi, data);
222
223         case VCMD_net_create_v0:
224                 return vc_net_create(id, NULL);
225         case VCMD_net_create:
226                 return vc_net_create(id, data);
227         case VCMD_net_migrate:
228                 return vc_net_migrate(nxi, data);
229         case VCMD_net_add:
230                 return vc_net_add(nxi, data);
231         case VCMD_net_remove:
232                 return vc_net_remove(nxi, data);
233
234 #ifdef  CONFIG_VSERVER_HISTORY
235         case VCMD_dump_history:
236                 return vc_dump_history(id);
237         case VCMD_read_history:
238                 return __COMPAT(vc_read_history, id, data, compat);
239 #endif
240 #ifdef  CONFIG_VSERVER_MONITOR
241         case VCMD_read_monitor:
242                 return __COMPAT(vc_read_monitor, id, data, compat);
243 #endif
244 #ifdef  CONFIG_VSERVER_LEGACY
245         case VCMD_new_s_context:
246                 return vc_new_s_context(id, data);
247 #endif
248 #ifdef  CONFIG_VSERVER_LEGACYNET
249         case VCMD_set_ipv4root:
250                 return vc_set_ipv4root(id, data);
251 #endif
252         default:
253                 vxwprintk(1, "unimplemented VCMD_%02d_%d[%d]",
254                         VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
255         }
256         return -ENOSYS;
257 }
258
259
260 #define __VCMD(vcmd, _perm, _args, _flags)              \
261         case VCMD_ ## vcmd: perm = _perm;               \
262                 args = _args; flags = _flags; break
263
264
265 #define VCA_NONE        0x00
266 #define VCA_VXI         0x01
267 #define VCA_NXI         0x02
268
269 #define VCF_NONE        0x00
270 #define VCF_INFO        0x01
271 #define VCF_ADMIN       0x02
272 #define VCF_ARES        0x06    /* includes admin */
273 #define VCF_SETUP       0x08
274
275 #define VCF_ZIDOK       0x10    /* zero id okay */
276
277
278 static inline
279 long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
280 {
281         long ret;
282         int permit = -1, state = 0;
283         int perm = -1, args = 0, flags = 0;
284         struct vx_info *vxi = NULL;
285         struct nx_info *nxi = NULL;
286
287         switch (cmd) {
288         /* unpriviledged commands */
289         __VCMD(get_version,      0, VCA_NONE,   0);
290         __VCMD(get_vci,          0, VCA_NONE,   0);
291         __VCMD(get_rlimit_mask,  0, VCA_NONE,   0);
292         __VCMD(get_space_mask,   0, VCA_NONE,   0);
293
294         /* info commands */
295         __VCMD(task_xid,         2, VCA_NONE,   0);
296         __VCMD(reset_minmax,     2, VCA_VXI,    0);
297         __VCMD(vx_info,          3, VCA_VXI,    VCF_INFO);
298         __VCMD(get_bcaps,        3, VCA_VXI,    VCF_INFO);
299         __VCMD(get_ccaps_v0,     3, VCA_VXI,    VCF_INFO);
300         __VCMD(get_ccaps,        3, VCA_VXI,    VCF_INFO);
301         __VCMD(get_cflags,       3, VCA_VXI,    VCF_INFO);
302         __VCMD(get_vhi_name,     3, VCA_VXI,    VCF_INFO);
303         __VCMD(get_rlimit,       3, VCA_VXI,    VCF_INFO);
304
305         __VCMD(ctx_stat,         3, VCA_VXI,    VCF_INFO);
306         __VCMD(virt_stat,        3, VCA_VXI,    VCF_INFO);
307         __VCMD(sock_stat,        3, VCA_VXI,    VCF_INFO);
308         __VCMD(rlimit_stat,      3, VCA_VXI,    VCF_INFO);
309
310         __VCMD(task_nid,         2, VCA_NONE,   0);
311         __VCMD(nx_info,          3, VCA_NXI,    VCF_INFO);
312         __VCMD(get_ncaps,        3, VCA_NXI,    VCF_INFO);
313         __VCMD(get_nflags,       3, VCA_NXI,    VCF_INFO);
314
315         __VCMD(get_iattr,        2, VCA_NONE,   0);
316         __VCMD(fget_iattr,       2, VCA_NONE,   0);
317         __VCMD(get_dlimit,       3, VCA_NONE,   VCF_INFO);
318         __VCMD(get_sched,        3, VCA_VXI,    VCF_INFO);
319         __VCMD(sched_info,       3, VCA_VXI,    VCF_INFO|VCF_ZIDOK);
320
321         /* lower admin commands */
322         __VCMD(wait_exit,        4, VCA_VXI,    VCF_INFO);
323         __VCMD(ctx_create_v0,    5, VCA_NONE,   0);
324         __VCMD(ctx_create,       5, VCA_NONE,   0);
325         __VCMD(ctx_migrate_v0,   5, VCA_VXI,    VCF_ADMIN);
326         __VCMD(ctx_migrate,      5, VCA_VXI,    VCF_ADMIN);
327         __VCMD(enter_space_v0,   5, VCA_VXI,    VCF_ADMIN);
328         __VCMD(enter_space,      5, VCA_VXI,    VCF_ADMIN);
329
330         __VCMD(net_create_v0,    5, VCA_NONE,   0);
331         __VCMD(net_create,       5, VCA_NONE,   0);
332         __VCMD(net_migrate,      5, VCA_NXI,    VCF_ADMIN);
333
334         /* higher admin commands */
335         __VCMD(ctx_kill,         6, VCA_VXI,    VCF_ARES);
336         __VCMD(set_space_v0,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
337         __VCMD(set_space,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
338
339         __VCMD(set_ccaps_v0,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
340         __VCMD(set_ccaps,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
341         __VCMD(set_bcaps,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
342         __VCMD(set_cflags,       7, VCA_VXI,    VCF_ARES|VCF_SETUP);
343
344         __VCMD(set_vhi_name,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
345         __VCMD(set_rlimit,       7, VCA_VXI,    VCF_ARES|VCF_SETUP);
346         __VCMD(set_sched,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
347         __VCMD(set_sched_v2,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
348         __VCMD(set_sched_v3,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
349         __VCMD(set_sched_v4,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
350
351         __VCMD(set_ncaps,        7, VCA_NXI,    VCF_ARES|VCF_SETUP);
352         __VCMD(set_nflags,       7, VCA_NXI,    VCF_ARES|VCF_SETUP);
353         __VCMD(net_add,          8, VCA_NXI,    VCF_ARES|VCF_SETUP);
354         __VCMD(net_remove,       8, VCA_NXI,    VCF_ARES|VCF_SETUP);
355
356         __VCMD(set_iattr,        7, VCA_NONE,   0);
357         __VCMD(fset_iattr,       7, VCA_NONE,   0);
358         __VCMD(set_dlimit,       7, VCA_NONE,   VCF_ARES);
359         __VCMD(add_dlimit,       8, VCA_NONE,   VCF_ARES);
360         __VCMD(rem_dlimit,       8, VCA_NONE,   VCF_ARES);
361
362         /* debug level admin commands */
363 #ifdef  CONFIG_VSERVER_HISTORY
364         __VCMD(dump_history,     9, VCA_NONE,   0);
365         __VCMD(read_history,     9, VCA_NONE,   0);
366 #endif
367 #ifdef  CONFIG_VSERVER_MONITOR
368         __VCMD(read_monitor,     9, VCA_NONE,   0);
369 #endif
370
371         /* legacy commands */
372 #ifdef  CONFIG_VSERVER_LEGACY
373         __VCMD(new_s_context,    1, VCA_NONE,   0);
374         __VCMD(create_context,   5, VCA_NONE,   0);
375 #endif
376 #ifdef  CONFIG_VSERVER_LEGACYNET
377         __VCMD(set_ipv4root,     5, VCA_NONE,   0);
378 #endif
379         default:
380                 perm = -1;
381         }
382
383         vxdprintk(VXD_CBIT(switch, 0),
384                 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
385                 VC_CATEGORY(cmd), VC_COMMAND(cmd),
386                 VC_VERSION(cmd), id, data, compat,
387                 perm, args, flags);
388
389         ret = -ENOSYS;
390         if (perm < 0)
391                 goto out;
392
393         state = 1;
394 #ifdef  CONFIG_VSERVER_LEGACY
395         if (!capable(CAP_CONTEXT) &&
396                 /* dirty hack for capremove */
397                 !(cmd==VCMD_new_s_context && id==-2))
398                 goto out;
399 #else
400         if (!capable(CAP_CONTEXT))
401                 goto out;
402 #endif
403
404         state = 2;
405         /* moved here from the individual commands */
406         ret = -EPERM;
407         if ((perm > 1) && !capable(CAP_SYS_ADMIN))
408                 goto out;
409
410         state = 3;
411         /* vcmd involves resource management  */
412         ret = -EPERM;
413         if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
414                 goto out;
415
416         state = 4;
417         /* various legacy exceptions */
418         switch (cmd) {
419 #ifdef  CONFIG_VSERVER_LEGACY
420         case VCMD_set_cflags:
421         case VCMD_set_ccaps_v0:
422                 ret = 0;
423                 if (vx_check(0, VS_WATCH))
424                         goto out;
425                 break;
426
427         case VCMD_ctx_create_v0:
428 #endif
429         /* will go away when spectator is a cap */
430         case VCMD_ctx_migrate_v0:
431         case VCMD_ctx_migrate:
432                 if (id == 1) {
433                         current->xid = 1;
434                         ret = 1;
435                         goto out;
436                 }
437                 break;
438
439         /* will go away when spectator is a cap */
440         case VCMD_net_migrate:
441                 if (id == 1) {
442                         current->nid = 1;
443                         ret = 1;
444                         goto out;
445                 }
446                 break;
447
448         /* legacy special casing */
449         case VCMD_set_space_v0:
450                 id = -1;
451                 break;
452         }
453
454         /* vcmds are fine by default */
455         permit = 1;
456
457         /* admin type vcmds require admin ... */
458         if (flags & VCF_ADMIN)
459                 permit = vx_check(0, VS_ADMIN) ? 1 : 0;
460
461         /* ... but setup type vcmds override that */
462         if (!permit && (flags & VCF_SETUP))
463                 permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
464
465         state = 5;
466         ret = -EPERM;
467         if (!permit)
468                 goto out;
469
470         state = 6;
471         if (!id && (flags & VCF_ZIDOK))
472                 goto skip_id;
473
474         ret = -ESRCH;
475         if (args & VCA_VXI) {
476                 vxi = lookup_vx_info(id);
477                 if (!vxi)
478                         goto out;
479
480                 if ((flags & VCF_ADMIN) &&
481                         /* special case kill for shutdown */
482                         (cmd != VCMD_ctx_kill) &&
483                         /* can context be administrated? */
484                         !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
485                         ret = -EACCES;
486                         goto out_vxi;
487                 }
488         }
489         state = 7;
490         if (args & VCA_NXI) {
491                 nxi = lookup_nx_info(id);
492                 if (!nxi)
493                         goto out_vxi;
494
495                 if ((flags & VCF_ADMIN) &&
496                         /* can context be administrated? */
497                         !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
498                         ret = -EACCES;
499                         goto out_nxi;
500                 }
501         }
502 skip_id:
503         state = 8;
504         ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
505
506 out_nxi:
507         if ((args & VCA_NXI) && nxi)
508                 put_nx_info(nxi);
509 out_vxi:
510         if ((args & VCA_VXI) && vxi)
511                 put_vx_info(vxi);
512 out:
513         vxdprintk(VXD_CBIT(switch, 1),
514                 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
515                 VC_CATEGORY(cmd), VC_COMMAND(cmd),
516                 VC_VERSION(cmd), ret, ret, state, permit);
517         return ret;
518 }
519
520 asmlinkage long
521 sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
522 {
523         return do_vserver(cmd, id, data, 0);
524 }
525
526 #ifdef  CONFIG_COMPAT
527
528 asmlinkage long
529 sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
530 {
531         return do_vserver(cmd, id, data, 1);
532 }
533
534 #endif  /* CONFIG_COMPAT */