fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / kernel / vserver / switch.c
index bbf1aef..67b3870 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  Virtual Server: Syscall Switch
  *
- *  Copyright (C) 2003-2005  Herbert Pötzl
+ *  Copyright (C) 2003-2007  Herbert Pötzl
  *
  *  V0.01  syscall switch
  *  V0.02  added signal to context
@@ -11,6 +11,8 @@
  *  V0.04  added iattr, task/xid functions
  *  V0.05  added debug/history stuff
  *  V0.06  added compat32 layer
+ *  V0.07  vcmd args and perms
+ *  V0.08  added status commands
  *
  */
 
 #include <linux/compat.h>
 #include <asm/errno.h>
 
-#include <linux/vserver/network.h>
+#include <linux/vs_context.h>
+#include <linux/vs_network.h>
 #include <linux/vserver/switch.h>
-#include <linux/vserver/debug.h>
 
+#include "vci_config.h"
 
 static inline
 int vc_get_version(uint32_t id)
@@ -34,8 +37,15 @@ int vc_get_version(uint32_t id)
        return VCI_VERSION;
 }
 
+static inline
+int vc_get_vci(uint32_t id)
+{
+       return vci_kernel_config();
+}
+
 #include <linux/vserver/context_cmd.h>
 #include <linux/vserver/cvirt_cmd.h>
+#include <linux/vserver/cacct_cmd.h>
 #include <linux/vserver/limit_cmd.h>
 #include <linux/vserver/network_cmd.h>
 #include <linux/vserver/sched_cmd.h>
@@ -43,7 +53,7 @@ int vc_get_version(uint32_t id)
 #include <linux/vserver/inode_cmd.h>
 #include <linux/vserver/dlimit_cmd.h>
 #include <linux/vserver/signal_cmd.h>
-#include <linux/vserver/namespace_cmd.h>
+#include <linux/vserver/space_cmd.h>
 
 #include <linux/vserver/legacy.h>
 #include <linux/vserver/inode.h>
@@ -60,118 +70,110 @@ int vc_get_version(uint32_t id)
 
 
 static inline
-long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
+long do_vcmd(uint32_t cmd, uint32_t id,
+       struct vx_info *vxi, struct nx_info *nxi,
+       void __user *data, int compat)
 {
-       vxdprintk(VXD_CBIT(switch, 0),
-               "vc: VCMD_%02d_%d[%d], %d,%p,%d",
-               VC_CATEGORY(cmd), VC_COMMAND(cmd),
-               VC_VERSION(cmd), id, data, compat);
-
-#ifdef CONFIG_VSERVER_LEGACY
-       if (!capable(CAP_CONTEXT) &&
-               /* dirty hack for capremove */
-               !(cmd==VCMD_new_s_context && id==-2))
-               return -EPERM;
-#else
-       if (!capable(CAP_CONTEXT))
-               return -EPERM;
-#endif
-
        switch (cmd) {
+
        case VCMD_get_version:
                return vc_get_version(id);
-
-       case VCMD_dump_history:
-#ifdef CONFIG_VSERVER_HISTORY
-               return vc_dump_history(id);
-#else
-               return -ENOSYS;
-#endif
-
-#ifdef CONFIG_VSERVER_LEGACY
-       case VCMD_new_s_context:
-               return vc_new_s_context(id, data);
-#endif
-#ifdef CONFIG_VSERVER_LEGACYNET
-       case VCMD_set_ipv4root:
-               return vc_set_ipv4root(id, data);
-#endif
+       case VCMD_get_vci:
+               return vc_get_vci(id);
 
        case VCMD_task_xid:
                return vc_task_xid(id, data);
        case VCMD_vx_info:
-               return vc_vx_info(id, data);
+               return vc_vx_info(vxi, data);
 
        case VCMD_task_nid:
                return vc_task_nid(id, data);
        case VCMD_nx_info:
-               return vc_nx_info(id, data);
-
-       case VCMD_set_namespace_v0:
-               return vc_set_namespace(-1, data);
-       case VCMD_set_namespace:
-               return vc_set_namespace(id, data);
-       }
+               return vc_nx_info(nxi, data);
 
-       /* those are allowed while in setup too */
-       if (!vx_check(0, VX_ADMIN|VX_WATCH) &&
-               !vx_flags(VXF_STATE_SETUP,0))
-               return -EPERM;
+       case VCMD_set_space_v0:
+       /* this is version 1 */
+       case VCMD_set_space:
+               return vc_set_space(vxi, data);
 
-#ifdef CONFIG_VSERVER_LEGACY
-       switch (cmd) {
-       case VCMD_set_cflags:
-       case VCMD_set_ccaps:
-               if (vx_check(0, VX_WATCH))
-                       return 0;
-       }
-#endif
+       case VCMD_get_space_mask:
+               return vc_get_space_mask(vxi, data);
 
-       switch (cmd) {
 #ifdef CONFIG_IA32_EMULATION
        case VCMD_get_rlimit:
-               return __COMPAT(vc_get_rlimit, id, data, compat);
+               return __COMPAT(vc_get_rlimit, vxi, data, compat);
        case VCMD_set_rlimit:
-               return __COMPAT(vc_set_rlimit, id, data, compat);
+               return __COMPAT(vc_set_rlimit, vxi, data, compat);
 #else
        case VCMD_get_rlimit:
-               return vc_get_rlimit(id, data);
+               return vc_get_rlimit(vxi, data);
        case VCMD_set_rlimit:
-               return vc_set_rlimit(id, data);
+               return vc_set_rlimit(vxi, data);
 #endif
        case VCMD_get_rlimit_mask:
                return vc_get_rlimit_mask(id, data);
+       case VCMD_reset_minmax:
+               return vc_reset_minmax(vxi, data);
 
        case VCMD_get_vhi_name:
-               return vc_get_vhi_name(id, data);
+               return vc_get_vhi_name(vxi, data);
        case VCMD_set_vhi_name:
-               return vc_set_vhi_name(id, data);
+               return vc_set_vhi_name(vxi, data);
+
+       case VCMD_ctx_stat:
+               return vc_ctx_stat(vxi, data);
+       case VCMD_virt_stat:
+               return vc_virt_stat(vxi, data);
+       case VCMD_sock_stat:
+               return vc_sock_stat(vxi, data);
+       case VCMD_rlimit_stat:
+               return vc_rlimit_stat(vxi, data);
 
        case VCMD_set_cflags:
-               return vc_set_cflags(id, data);
+               return vc_set_cflags(vxi, data);
        case VCMD_get_cflags:
-               return vc_get_cflags(id, data);
+               return vc_get_cflags(vxi, data);
 
+       case VCMD_set_ccaps_v0:
+               return vc_set_ccaps_v0(vxi, data);
+       /* this is version 1 */
        case VCMD_set_ccaps:
-               return vc_set_ccaps(id, data);
+               return vc_set_ccaps(vxi, data);
+       case VCMD_get_ccaps_v0:
+               return vc_get_ccaps_v0(vxi, data);
+       /* this is version 1 */
        case VCMD_get_ccaps:
-               return vc_get_ccaps(id, data);
+               return vc_get_ccaps(vxi, data);
+       case VCMD_set_bcaps:
+               return vc_set_bcaps(vxi, data);
+       case VCMD_get_bcaps:
+               return vc_get_bcaps(vxi, data);
 
        case VCMD_set_nflags:
-               return vc_set_nflags(id, data);
+               return vc_set_nflags(nxi, data);
        case VCMD_get_nflags:
-               return vc_get_nflags(id, data);
+               return vc_get_nflags(nxi, data);
 
        case VCMD_set_ncaps:
-               return vc_set_ncaps(id, data);
+               return vc_set_ncaps(nxi, data);
        case VCMD_get_ncaps:
-               return vc_get_ncaps(id, data);
+               return vc_get_ncaps(nxi, data);
 
+#ifdef CONFIG_VSERVER_LEGACY
        case VCMD_set_sched_v2:
-               return vc_set_sched_v2(id, data);
-       /* this is version 3 */
+               return vc_set_sched_v2(vxi, data);
+#endif
+       case VCMD_set_sched_v3:
+               return vc_set_sched_v3(vxi, data);
+       case VCMD_set_sched_v4:
+               return vc_set_sched_v4(vxi, data);
+       /* this is version 5 */
        case VCMD_set_sched:
-               return vc_set_sched(id, data);
+               return vc_set_sched(vxi, data);
+       case VCMD_get_sched:
+               return vc_get_sched(vxi, data);
+       case VCMD_sched_info:
+               return vc_sched_info(vxi, data);
 
        case VCMD_add_dlimit:
                return __COMPAT(vc_add_dlimit, id, data, compat);
@@ -181,24 +183,16 @@ long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
                return __COMPAT(vc_set_dlimit, id, data, compat);
        case VCMD_get_dlimit:
                return __COMPAT(vc_get_dlimit, id, data, compat);
-       }
 
-       /* below here only with VX_ADMIN */
-       if (!vx_check(0, VX_ADMIN|VX_WATCH))
-               return -EPERM;
-
-       switch (cmd) {
        case VCMD_ctx_kill:
-               return vc_ctx_kill(id, data);
+               return vc_ctx_kill(vxi, data);
 
        case VCMD_wait_exit:
-               return vc_wait_exit(id, data);
+               return vc_wait_exit(vxi, data);
 
-       case VCMD_create_context:
 #ifdef CONFIG_VSERVER_LEGACY
+       case VCMD_create_context:
                return vc_ctx_create(id, NULL);
-#else
-               return -ENOSYS;
 #endif
 
        case VCMD_get_iattr:
@@ -206,63 +200,328 @@ long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
        case VCMD_set_iattr:
                return __COMPAT(vc_set_iattr, id, data, compat);
 
-       case VCMD_enter_namespace:
-               return vc_enter_namespace(id, data);
+       case VCMD_enter_space_v0:
+               return vc_enter_space(vxi, NULL);
+       /* this is version 1 */
+       case VCMD_enter_space:
+               return vc_enter_space(vxi, data);
 
        case VCMD_ctx_create_v0:
-#ifdef CONFIG_VSERVER_LEGACY
-               if (id == 1) {
-                       current->xid = 1;
-                       return 1;
-               }
-#endif
                return vc_ctx_create(id, NULL);
        case VCMD_ctx_create:
                return vc_ctx_create(id, data);
        case VCMD_ctx_migrate_v0:
-               return vc_ctx_migrate(id, NULL);
+               return vc_ctx_migrate(vxi, NULL);
        case VCMD_ctx_migrate:
-               return vc_ctx_migrate(id, data);
+               return vc_ctx_migrate(vxi, data);
 
        case VCMD_net_create_v0:
                return vc_net_create(id, NULL);
        case VCMD_net_create:
                return vc_net_create(id, data);
        case VCMD_net_migrate:
-               return vc_net_migrate(id, data);
+               return vc_net_migrate(nxi, data);
        case VCMD_net_add:
-               return vc_net_add(id, data);
+               return vc_net_add(nxi, data);
        case VCMD_net_remove:
-               return vc_net_remove(id, data);
+               return vc_net_remove(nxi, data);
 
+#ifdef CONFIG_VSERVER_HISTORY
+       case VCMD_dump_history:
+               return vc_dump_history(id);
+       case VCMD_read_history:
+               return __COMPAT(vc_read_history, id, data, compat);
+#endif
+#ifdef CONFIG_VSERVER_MONITOR
+       case VCMD_read_monitor:
+               return __COMPAT(vc_read_monitor, id, data, compat);
+#endif
+#ifdef CONFIG_VSERVER_LEGACY
+       case VCMD_new_s_context:
+               return vc_new_s_context(id, data);
+#endif
+#ifdef CONFIG_VSERVER_LEGACYNET
+       case VCMD_set_ipv4root:
+               return vc_set_ipv4root(id, data);
+#endif
+       default:
+               vxwprintk(1, "unimplemented VCMD_%02d_%d[%d]",
+                       VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
        }
        return -ENOSYS;
 }
 
-asmlinkage long
-sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
+
+#define        __VCMD(vcmd, _perm, _args, _flags)              \
+       case VCMD_ ## vcmd: perm = _perm;               \
+               args = _args; flags = _flags; break
+
+
+#define VCA_NONE       0x00
+#define VCA_VXI                0x01
+#define VCA_NXI                0x02
+
+#define VCF_NONE       0x00
+#define VCF_INFO       0x01
+#define VCF_ADMIN      0x02
+#define VCF_ARES       0x06    /* includes admin */
+#define VCF_SETUP      0x08
+
+#define VCF_ZIDOK      0x10    /* zero id okay */
+
+
+static inline
+long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
 {
-       long ret = do_vserver(cmd, id, data, 0);
+       long ret;
+       int permit = -1, state = 0;
+       int perm = -1, args = 0, flags = 0;
+       struct vx_info *vxi = NULL;
+       struct nx_info *nxi = NULL;
+
+       switch (cmd) {
+       /* unpriviledged commands */
+       __VCMD(get_version,      0, VCA_NONE,   0);
+       __VCMD(get_vci,          0, VCA_NONE,   0);
+       __VCMD(get_rlimit_mask,  0, VCA_NONE,   0);
+       __VCMD(get_space_mask,   0, VCA_NONE,   0);
+
+       /* info commands */
+       __VCMD(task_xid,         2, VCA_NONE,   0);
+       __VCMD(reset_minmax,     2, VCA_VXI,    0);
+       __VCMD(vx_info,          3, VCA_VXI,    VCF_INFO);
+       __VCMD(get_bcaps,        3, VCA_VXI,    VCF_INFO);
+       __VCMD(get_ccaps_v0,     3, VCA_VXI,    VCF_INFO);
+       __VCMD(get_ccaps,        3, VCA_VXI,    VCF_INFO);
+       __VCMD(get_cflags,       3, VCA_VXI,    VCF_INFO);
+       __VCMD(get_vhi_name,     3, VCA_VXI,    VCF_INFO);
+       __VCMD(get_rlimit,       3, VCA_VXI,    VCF_INFO);
+
+       __VCMD(ctx_stat,         3, VCA_VXI,    VCF_INFO);
+       __VCMD(virt_stat,        3, VCA_VXI,    VCF_INFO);
+       __VCMD(sock_stat,        3, VCA_VXI,    VCF_INFO);
+       __VCMD(rlimit_stat,      3, VCA_VXI,    VCF_INFO);
+
+       __VCMD(task_nid,         2, VCA_NONE,   0);
+       __VCMD(nx_info,          3, VCA_NXI,    VCF_INFO);
+       __VCMD(get_ncaps,        3, VCA_NXI,    VCF_INFO);
+       __VCMD(get_nflags,       3, VCA_NXI,    VCF_INFO);
+
+       __VCMD(get_iattr,        2, VCA_NONE,   0);
+       __VCMD(get_dlimit,       3, VCA_NONE,   VCF_INFO);
+       __VCMD(get_sched,        3, VCA_VXI,    VCF_INFO);
+       __VCMD(sched_info,       3, VCA_VXI,    VCF_INFO|VCF_ZIDOK);
+
+       /* lower admin commands */
+       __VCMD(wait_exit,        4, VCA_VXI,    VCF_INFO);
+       __VCMD(ctx_create_v0,    5, VCA_NONE,   0);
+       __VCMD(ctx_create,       5, VCA_NONE,   0);
+       __VCMD(ctx_migrate_v0,   5, VCA_VXI,    VCF_ADMIN);
+       __VCMD(ctx_migrate,      5, VCA_VXI,    VCF_ADMIN);
+       __VCMD(enter_space_v0,   5, VCA_VXI,    VCF_ADMIN);
+       __VCMD(enter_space,      5, VCA_VXI,    VCF_ADMIN);
+
+       __VCMD(net_create_v0,    5, VCA_NONE,   0);
+       __VCMD(net_create,       5, VCA_NONE,   0);
+       __VCMD(net_migrate,      5, VCA_NXI,    VCF_ADMIN);
+
+       /* higher admin commands */
+       __VCMD(ctx_kill,         6, VCA_VXI,    VCF_ARES);
+       __VCMD(set_space_v0,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_space,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+
+       __VCMD(set_ccaps_v0,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_ccaps,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_bcaps,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_cflags,       7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+
+       __VCMD(set_vhi_name,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_rlimit,       7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_sched,        7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_sched_v2,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_sched_v3,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_sched_v4,     7, VCA_VXI,    VCF_ARES|VCF_SETUP);
+
+       __VCMD(set_ncaps,        7, VCA_NXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(set_nflags,       7, VCA_NXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(net_add,          8, VCA_NXI,    VCF_ARES|VCF_SETUP);
+       __VCMD(net_remove,       8, VCA_NXI,    VCF_ARES|VCF_SETUP);
+
+       __VCMD(set_iattr,        7, VCA_NONE,   0);
+       __VCMD(set_dlimit,       7, VCA_NONE,   VCF_ARES);
+       __VCMD(add_dlimit,       8, VCA_NONE,   VCF_ARES);
+       __VCMD(rem_dlimit,       8, VCA_NONE,   VCF_ARES);
+
+       /* debug level admin commands */
+#ifdef CONFIG_VSERVER_HISTORY
+       __VCMD(dump_history,     9, VCA_NONE,   0);
+       __VCMD(read_history,     9, VCA_NONE,   0);
+#endif
+#ifdef CONFIG_VSERVER_MONITOR
+       __VCMD(read_monitor,     9, VCA_NONE,   0);
+#endif
+
+       /* legacy commands */
+#ifdef CONFIG_VSERVER_LEGACY
+       __VCMD(new_s_context,    1, VCA_NONE,   0);
+       __VCMD(create_context,   5, VCA_NONE,   0);
+#endif
+#ifdef CONFIG_VSERVER_LEGACYNET
+       __VCMD(set_ipv4root,     5, VCA_NONE,   0);
+#endif
+       default:
+               perm = -1;
+       }
+
+       vxdprintk(VXD_CBIT(switch, 0),
+               "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
+               VC_CATEGORY(cmd), VC_COMMAND(cmd),
+               VC_VERSION(cmd), id, data, compat,
+               perm, args, flags);
+
+       ret = -ENOSYS;
+       if (perm < 0)
+               goto out;
+
+       state = 1;
+#ifdef CONFIG_VSERVER_LEGACY
+       if (!capable(CAP_CONTEXT) &&
+               /* dirty hack for capremove */
+               !(cmd==VCMD_new_s_context && id==-2))
+               goto out;
+#else
+       if (!capable(CAP_CONTEXT))
+               goto out;
+#endif
 
+       state = 2;
+       /* moved here from the individual commands */
+       ret = -EPERM;
+       if ((perm > 1) && !capable(CAP_SYS_ADMIN))
+               goto out;
+
+       state = 3;
+       /* vcmd involves resource management  */
+       ret = -EPERM;
+       if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
+               goto out;
+
+       state = 4;
+       /* various legacy exceptions */
+       switch (cmd) {
+#ifdef CONFIG_VSERVER_LEGACY
+       case VCMD_set_cflags:
+       case VCMD_set_ccaps_v0:
+               ret = 0;
+               if (vx_check(0, VS_WATCH))
+                       goto out;
+               break;
+
+       case VCMD_ctx_create_v0:
+#endif
+       /* will go away when spectator is a cap */
+       case VCMD_ctx_migrate_v0:
+       case VCMD_ctx_migrate:
+               if (id == 1) {
+                       current->xid = 1;
+                       ret = 1;
+                       goto out;
+               }
+               break;
+
+       /* will go away when spectator is a cap */
+       case VCMD_net_migrate:
+               if (id == 1) {
+                       current->nid = 1;
+                       ret = 1;
+                       goto out;
+               }
+               break;
+
+       /* legacy special casing */
+       case VCMD_set_space_v0:
+               id = -1;
+               break;
+       }
+
+       /* vcmds are fine by default */
+       permit = 1;
+
+       /* admin type vcmds require admin ... */
+       if (flags & VCF_ADMIN)
+               permit = vx_check(0, VS_ADMIN) ? 1 : 0;
+
+       /* ... but setup type vcmds override that */
+       if (!permit && (flags & VCF_SETUP))
+               permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
+
+       state = 5;
+       ret = -EPERM;
+       if (!permit)
+               goto out;
+
+       state = 6;
+       if (!id && (flags & VCF_ZIDOK))
+               goto skip_id;
+
+       ret = -ESRCH;
+       if (args & VCA_VXI) {
+               vxi = lookup_vx_info(id);
+               if (!vxi)
+                       goto out;
+
+               if ((flags & VCF_ADMIN) &&
+                       /* special case kill for shutdown */
+                       (cmd != VCMD_ctx_kill) &&
+                       /* can context be administrated? */
+                       !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
+                       ret = -EACCES;
+                       goto out_vxi;
+               }
+       }
+       state = 7;
+       if (args & VCA_NXI) {
+               nxi = lookup_nx_info(id);
+               if (!nxi)
+                       goto out_vxi;
+
+               if ((flags & VCF_ADMIN) &&
+                       /* can context be administrated? */
+                       !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
+                       ret = -EACCES;
+                       goto out_nxi;
+               }
+       }
+skip_id:
+       state = 8;
+       ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
+
+out_nxi:
+       if ((args & VCA_NXI) && nxi)
+               put_nx_info(nxi);
+out_vxi:
+       if ((args & VCA_VXI) && vxi)
+               put_vx_info(vxi);
+out:
        vxdprintk(VXD_CBIT(switch, 1),
-               "vc: VCMD_%02d_%d[%d] = %08lx(%ld)",
+               "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
                VC_CATEGORY(cmd), VC_COMMAND(cmd),
-               VC_VERSION(cmd), ret, ret);
+               VC_VERSION(cmd), ret, ret, state, permit);
        return ret;
 }
 
+asmlinkage long
+sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
+{
+       return do_vserver(cmd, id, data, 0);
+}
+
 #ifdef CONFIG_COMPAT
 
 asmlinkage long
 sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
 {
-       long ret = do_vserver(cmd, id, data, 1);
-
-       vxdprintk(VXD_CBIT(switch, 1),
-               "vc: VCMD_%02d_%d[%d] = %08lx(%ld)",
-               VC_CATEGORY(cmd), VC_COMMAND(cmd),
-               VC_VERSION(cmd), ret, ret);
-       return ret;
+       return do_vserver(cmd, id, data, 1);
 }
 
 #endif /* CONFIG_COMPAT */