/* Copyright 2005 Princeton University Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "config.h" #include "vserver.h" static inline PyObject *inc_and_ret_none(void) { Py_INCREF(Py_None); return Py_None; } #define NONE inc_and_ret_none() #define PL_INSECURE_BCAPS (vc_get_insecurebcaps() | (1 << VC_CAP_NET_BIND_SERVICE)) #define PL_INSECURE_CCAPS vc_get_insecureccaps() static PyObject * __vserver_get_rlimit(xid_t xid, int resource) { struct vc_rlimit limits; PyObject *ret; errno = 0; if (vc_get_rlimit(xid, resource, &limits)==-1) ret = PyErr_SetFromErrno(PyExc_OSError); else ret = Py_BuildValue("LLL",limits.hard, limits.soft, limits.min); return ret; } static PyObject * vserver_get_rlimit(PyObject *self, PyObject *args) { xid_t xid; int resource; PyObject *ret; if (!PyArg_ParseTuple(args, "Ii", &xid, &resource)) ret = NULL; else ret = __vserver_get_rlimit(xid, resource); return ret; } static PyObject * vserver_set_rlimit(PyObject *self, PyObject *args) { struct vc_rlimit limits; struct vc_rlimit_mask mask; uint32_t bitmask; xid_t xid; int resource; PyObject *ret = NULL; limits.min = VC_LIM_KEEP; limits.soft = VC_LIM_KEEP; limits.hard = VC_LIM_KEEP; if (!PyArg_ParseTuple(args, "IiLLL", &xid, &resource, &limits.hard, &limits.soft, &limits.min)) return NULL; errno = 0; if (vc_get_rlimit_mask(xid, &mask)==-1) { ret = PyErr_SetFromErrno(PyExc_OSError); } else { bitmask = (1<=0) { res = Py_BuildValue("(i,i,i,i,i)", data.space_used, data.space_total, data.inodes_used, data.inodes_total, data.reserved); } else { res = PyErr_SetFromErrno(PyExc_OSError); } return res; } static PyObject * vserver_set_dlimit(PyObject *self, PyObject *args) { char* path; unsigned xid; struct vc_ctx_dlimit data; memset(&data,0,sizeof(data)); if (!PyArg_ParseTuple(args, "siiiiii", &path, &xid, &data.space_used, &data.space_total, &data.inodes_used, &data.inodes_total, &data.reserved)) return NULL; if ((vc_add_dlimit(path, xid, 0) && errno != EEXIST) || vc_set_dlimit(path, xid, 0, &data)) return PyErr_SetFromErrno(PyExc_OSError); return NONE; } static PyObject * vserver_unset_dlimit(PyObject *self, PyObject *args) { char *path; unsigned xid; if (!PyArg_ParseTuple(args, "si", &path, &xid)) return NULL; if (vc_rem_dlimit(path, xid, 0) && errno != ESRCH) return PyErr_SetFromErrno(PyExc_OSError); return NONE; } static PyObject * vserver_set_bcaps(PyObject *self, PyObject *args) { xid_t ctx; struct vc_ctx_caps caps; if (!PyArg_ParseTuple(args, "IK", &ctx, &caps.bcaps)) return NULL; caps.bmask = PL_INSECURE_BCAPS; caps.cmask = caps.ccaps = 0; if (vc_set_ccaps(ctx, &caps) == -1 && errno != ESRCH) return PyErr_SetFromErrno(PyExc_OSError); return NONE; } static PyObject * vserver_text2bcaps(PyObject *self, PyObject *args) { struct vc_ctx_caps caps = { .bcaps = 0 }; const char *list; int len; struct vc_err_listparser err; if (!PyArg_ParseTuple(args, "s#", &list, &len)) return NULL; vc_list2bcap(list, len, &err, &caps); return Py_BuildValue("K", caps.bcaps); } static PyObject * vserver_get_bcaps(PyObject *self, PyObject *args) { xid_t ctx; struct vc_ctx_caps caps; if (!PyArg_ParseTuple(args, "I", &ctx)) return NULL; if (vc_get_ccaps(ctx, &caps) == -1) { if (errno != ESRCH) return PyErr_SetFromErrno(PyExc_OSError); else caps.bcaps = 0; } return Py_BuildValue("K", caps.bcaps & PL_INSECURE_BCAPS); } static PyObject * vserver_bcaps2text(PyObject *self, PyObject *args) { struct vc_ctx_caps caps = { .bcaps = 0 }; PyObject *list; const char *cap; if (!PyArg_ParseTuple(args, "K", &caps.bcaps)) return NULL; list = PyString_FromString(""); while ((cap = vc_lobcap2text(&caps.bcaps)) != NULL) { if (list == NULL) break; PyString_ConcatAndDel(&list, PyString_FromFormat( (PyString_Size(list) > 0 ? ",CAP_%s" : "CAP_%s" ), cap)); } return list; } static PyObject * vserver_set_ccaps(PyObject *self, PyObject *args) { xid_t ctx; struct vc_ctx_caps caps; if (!PyArg_ParseTuple(args, "IK", &ctx, &caps.ccaps)) return NULL; caps.cmask = PL_INSECURE_CCAPS; caps.bmask = caps.bcaps = 0; if (vc_set_ccaps(ctx, &caps) == -1 && errno != ESRCH) return PyErr_SetFromErrno(PyExc_OSError); return NONE; } static PyObject * vserver_text2ccaps(PyObject *self, PyObject *args) { struct vc_ctx_caps caps = { .ccaps = 0 }; const char *list; int len; struct vc_err_listparser err; if (!PyArg_ParseTuple(args, "s#", &list, &len)) return NULL; vc_list2ccap(list, len, &err, &caps); return Py_BuildValue("K", caps.ccaps); } static PyObject * vserver_get_ccaps(PyObject *self, PyObject *args) { xid_t ctx; struct vc_ctx_caps caps; if (!PyArg_ParseTuple(args, "I", &ctx)) return NULL; if (vc_get_ccaps(ctx, &caps) == -1) { if (errno != ESRCH) return PyErr_SetFromErrno(PyExc_OSError); else caps.ccaps = 0; } return Py_BuildValue("K", caps.ccaps & PL_INSECURE_CCAPS); } static PyObject * vserver_ccaps2text(PyObject *self, PyObject *args) { struct vc_ctx_caps caps = { .ccaps = 0 }; PyObject *list; const char *cap; if (!PyArg_ParseTuple(args, "K", &caps.ccaps)) return NULL; list = PyString_FromString(""); while ((cap = vc_loccap2text(&caps.ccaps)) != NULL) { if (list == NULL) break; PyString_ConcatAndDel(&list, PyString_FromFormat( (PyString_Size(list) > 0 ? ",%s" : "%s" ), cap)); } return list; } static inline int convert_address(const char *str, struct vc_net_addr *addr) { void *dst; if (inet_pton(AF_INET6, str, addr->vna_v6_ip.s6_addr) > 0) { addr->vna_type = VC_NXA_TYPE_IPV6; return 0; } else if (inet_pton(AF_INET, str, &addr->vna_v4_ip.s_addr) > 0) { addr->vna_type = VC_NXA_TYPE_IPV4; return 0; } return -1; } static int mask_to_prefix(void *data, int limit) { uint8_t *mask = data; int prefix; for (prefix = 0; prefix < limit && mask[prefix >> 3] & (1 << (prefix & 0x07)); prefix++) ; return prefix; } static int get_mask(struct vc_net_addr *addr) { struct ifaddrs *head, *ifa; int ret = 0; int family, offset, len; void *ip; switch (addr->vna_type) { case VC_NXA_TYPE_IPV4: family = AF_INET; offset = offsetof(struct sockaddr_in, sin_addr.s_addr); ip = &addr->vna_v4_ip.s_addr; len = 4; addr->vna_v4_mask.s_addr = htonl(0xffffff00); addr->vna_prefix = 24; break; case VC_NXA_TYPE_IPV6: family = AF_INET6; offset = offsetof(struct sockaddr_in6, sin6_addr.s6_addr); ip = addr->vna_v6_ip.s6_addr; len = 16; addr->vna_v6_mask.s6_addr32[0] = addr->vna_v6_mask.s6_addr32[1] = 0xffffffff; addr->vna_v6_mask.s6_addr32[2] = addr->vna_v6_mask.s6_addr32[3] = 0x00000000; addr->vna_prefix = 64; break; default: errno = EINVAL; return -1; } if (getifaddrs(&head) == -1) return -1; for (ifa = head; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr && ifa->ifa_addr->sa_family == family && memcmp((char *) ifa->ifa_addr + offset, ip, len) == 0) { switch (addr->vna_type) { case VC_NXA_TYPE_IPV4: memcpy(&addr->vna_v4_mask.s_addr, ifa->ifa_netmask + offset, len); addr->vna_prefix = mask_to_prefix(&addr->vna_v4_mask.s_addr, 32); break; case VC_NXA_TYPE_IPV6: memcpy(addr->vna_v6_mask.s6_addr, ifa->ifa_netmask + offset, len); addr->vna_prefix = mask_to_prefix(addr->vna_v6_mask.s6_addr, 128); break; } ret = 1; break; } } freeifaddrs(head); return ret; } /* XXX These two functions are really similar */ static PyObject * vserver_net_add(PyObject *self, PyObject *args) { struct vc_net_addr addr = { .vna_type = 0 }; nid_t nid; const char *ip; if (!PyArg_ParseTuple(args, "Is", &nid, &ip)) return NULL; /* Optimize standard case, which also needs to be handled differently */ if (strcmp(ip, "0.0.0.0") == 0) { addr.vna_type = VC_NXA_TYPE_MASK | VC_NXA_TYPE_IPV4; addr.vna_flags = 0; addr.vna_prefix = 0; addr.vna_parent = 0; addr.vna_v4_mask.s_addr = 0; addr.vna_v4_ip.s_addr = 0; } else { if (convert_address(ip, &addr) == -1) return PyErr_Format(PyExc_ValueError, "%s is not a valid IP address", ip); switch (get_mask(&addr)) { case -1: return PyErr_SetFromErrno(PyExc_OSError); case 0: /* XXX error here? */ break; } addr.vna_type |= VC_NXA_TYPE_ADDR; } if (vc_net_add(nid, &addr) == -1 && errno != ESRCH) return PyErr_SetFromErrno(PyExc_OSError); return NONE; } static PyObject * vserver_net_remove(PyObject *self, PyObject *args) { struct vc_net_addr addr; nid_t nid; const char *ip; if (!PyArg_ParseTuple(args, "Is", &nid, &ip)) return NULL; if (strcmp(ip, "all") == 0) addr.vna_type = VC_NXA_TYPE_ANY; else if (strcmp(ip, "all4") == 0) addr.vna_type = VC_NXA_TYPE_IPV4 | VC_NXA_TYPE_ANY; else if (strcmp(ip, "all6") == 0) addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ANY; else { if (convert_address(ip, &addr) == -1) return PyErr_Format(PyExc_ValueError, "%s is not a valid IP address", ip); switch (get_mask(&addr)) { case -1: return PyErr_SetFromErrno(PyExc_OSError); } addr.vna_type |= VC_NXA_TYPE_ADDR; } if (vc_net_remove(nid, &addr) == -1 && errno != ESRCH) return PyErr_SetFromErrno(PyExc_OSError); return NONE; } struct secure_dirs { int host_fd; int cwd_fd; int guest_fd; int target_fd; }; static inline int fchroot(int fd) { if (fchdir(fd) == -1 || chroot(".") == -1) return -1; return 0; } static inline int restore_dirs(struct secure_dirs *dirs) { if (dirs->host_fd != -1) { if (fchroot(dirs->host_fd) == -1) return -1; if (close(dirs->host_fd) == -1) return -1; } if (dirs->guest_fd != -1) { if (close(dirs->guest_fd) == -1) return -1; } if (dirs->target_fd != -1) { if (close(dirs->target_fd) == -1) return -1; } if (dirs->cwd_fd != -1) { if (fchdir(dirs->cwd_fd) == -1) return -1; if (close(dirs->cwd_fd) == -1) return -1; } return 0; } static inline int secure_chdir(struct secure_dirs *dirs, const char *guest, const char *target) { dirs->host_fd = dirs->cwd_fd = dirs->guest_fd = dirs->target_fd = -1; dirs->host_fd = open("/", O_RDONLY|O_DIRECTORY); if (dirs->host_fd == -1) return -1; dirs->cwd_fd = open(".", O_RDONLY|O_DIRECTORY); if (dirs->cwd_fd == -1) return -1; dirs->guest_fd = open(guest, O_RDONLY|O_DIRECTORY); if (dirs->guest_fd == -1) return -1; if (fchroot(dirs->guest_fd) == -1) return -1; dirs->target_fd = open(target, O_RDONLY|O_DIRECTORY); if (dirs->target_fd == -1) return -1; if (fchroot(dirs->host_fd) == -1 || close(dirs->host_fd) == -1) return -1; dirs->host_fd = -1; if (close(dirs->guest_fd) == -1) return -1; dirs->guest_fd = -1; if (fchdir(dirs->target_fd) == -1 || close(dirs->target_fd) == -1) return -1; return 0; } static PyObject * vserver_mount(PyObject *self, PyObject *args) { const char *guest, *target, *source, *type, *data = NULL; unsigned long flags = 0; struct secure_dirs dirs; if (!PyArg_ParseTuple(args, "ssss|ks", &source, &guest, &target, &type, &flags, &data)) return NULL; if (secure_chdir(&dirs, guest, target) == -1) goto out; if (mount(source, ".", type, flags, data) == -1 && errno != EBUSY) goto out; restore_dirs(&dirs); return NONE; out: restore_dirs(&dirs); return PyErr_SetFromErrno(PyExc_OSError); } static PyObject * vserver_umount(PyObject *self, PyObject *args) { const char *guest, *target; int flags = 0; char *path; PyObject *ret; if (!PyArg_ParseTuple(args, "ss|i", &guest, &target, &flags)) return NULL; path = calloc(strlen(guest) + strlen(target) + 2, sizeof(char)); sprintf(path, "%s/%s", guest, target); if (umount2(path, flags) == -1) ret = PyErr_SetFromErrno(PyExc_OSError); else ret = NONE; free(path); return ret; } static PyObject * vserver_set_runlevel(PyObject *self, PyObject *args) { const char *file; int runlevel; struct utmp ut; if (!PyArg_ParseTuple(args, "si", &file, &runlevel)) return NULL; utmpname(file); setutent(); memset(&ut, 0, sizeof(ut)); ut.ut_type = RUN_LVL; ut.ut_pid = ('#' << 8) + runlevel + '0'; pututline(&ut); endutent(); return NONE; } static PyObject * vserver_set_name(PyObject *self, PyObject *args) { xid_t ctx, slice_id; PyObject *ret; if (!PyArg_ParseTuple(args, "II", &ctx, &slice_id)) return NULL; if (vc_set_vhi_name(ctx, vcVHI_CONTEXT, (char *)&slice_id, sizeof(slice_id)) != 0 && errno != ESRCH) { return PyErr_SetFromErrno(PyExc_OSError); } else { return NONE; } } static PyObject * vserver_get_name(PyObject *self, PyObject *args) { xid_t ctx, slice_id; PyObject *ret; if (!PyArg_ParseTuple(args, "I", &ctx)) return NULL; if (vc_get_vhi_name(ctx, vcVHI_CONTEXT, (char *)&slice_id, sizeof(slice_id)) != 0) { ret = PyErr_SetFromErrno(PyExc_OSError); } else { ret = Py_BuildValue("i", slice_id); } return ret; } static PyMethodDef methods[] = { { "setdlimit", vserver_set_dlimit, METH_VARARGS, "Set disk limits for given vserver context" }, { "unsetdlimit", vserver_unset_dlimit, METH_VARARGS, "Remove disk limits for given vserver context" }, { "getdlimit", vserver_get_dlimit, METH_VARARGS, "Get disk limits for given vserver context" }, { "setrlimit", vserver_set_rlimit, METH_VARARGS, "Set resource limits for given resource of a vserver context" }, { "getrlimit", vserver_get_rlimit, METH_VARARGS, "Get resource limits for given resource of a vserver context" }, { "setbcaps", vserver_set_bcaps, METH_VARARGS, "Set POSIX capabilities of a vserver context" }, { "getbcaps", vserver_get_bcaps, METH_VARARGS, "Get POSIX capabilities of a vserver context" }, { "text2bcaps", vserver_text2bcaps, METH_VARARGS, "Translate a string of capabilities to a bitmap" }, { "bcaps2text", vserver_bcaps2text, METH_VARARGS, "Translate a capability-bitmap into a string" }, { "setccaps", vserver_set_ccaps, METH_VARARGS, "Set context capabilities of a vserver context" }, { "getccaps", vserver_get_ccaps, METH_VARARGS, "Get context capabilities of a vserver context" }, { "text2ccaps", vserver_text2ccaps, METH_VARARGS, "Translate a string of context capabilities to a bitmap" }, { "ccaps2text", vserver_ccaps2text, METH_VARARGS, "Translate a context-capability-bitmap into a string" }, { "netadd", vserver_net_add, METH_VARARGS, "Assign an IP address to a context" }, { "netremove", vserver_net_remove, METH_VARARGS, "Remove IP address(es) from a context" }, { "mount", vserver_mount, METH_VARARGS, "Perform the mount() system call" }, { "umount", vserver_umount, METH_VARARGS, "Perform the umount2() system call" }, { "setrunlevel", vserver_set_runlevel, METH_VARARGS, "Set the runlevel in utmp" }, { "setname", vserver_set_name, METH_VARARGS, "Set the vcVHI_CONTEXT for a xid." }, { "getname", vserver_get_name, METH_VARARGS, "Get the vcVHI_CONTEXT for a xid." }, { NULL, NULL, 0, NULL } }; PyMODINIT_FUNC initvserverimpl(void) { PyObject *mod; mod = Py_InitModule("vserverimpl", methods); /* export the set of 'safe' capabilities */ PyModule_AddIntConstant(mod, "CAP_SAFE", ~vc_get_insecurebcaps()); /* export the default vserver directory */ PyModule_AddStringConstant(mod, "VSERVER_BASEDIR", DEFAULT_VSERVERDIR); /* export limit-related constants */ PyModule_AddIntConstant(mod, "DLIMIT_KEEP", (int)VC_CDLIM_KEEP); PyModule_AddIntConstant(mod, "DLIMIT_INF", (int)VC_CDLIM_INFINITY); PyModule_AddIntConstant(mod, "VC_LIM_INFINITY", (int)VC_LIM_INFINITY); PyModule_AddIntConstant(mod, "VC_LIM_KEEP", (int)VC_LIM_KEEP); PyModule_AddIntConstant(mod, "RLIMIT_CPU", (int)RLIMIT_CPU); PyModule_AddIntConstant(mod, "RLIMIT_RSS", (int)RLIMIT_RSS); PyModule_AddIntConstant(mod, "RLIMIT_NPROC", (int)RLIMIT_NPROC); PyModule_AddIntConstant(mod, "RLIMIT_NOFILE", (int)RLIMIT_NOFILE); PyModule_AddIntConstant(mod, "RLIMIT_MEMLOCK", (int)RLIMIT_MEMLOCK); PyModule_AddIntConstant(mod, "RLIMIT_AS", (int)RLIMIT_AS); PyModule_AddIntConstant(mod, "RLIMIT_LOCKS", (int)RLIMIT_LOCKS); PyModule_AddIntConstant(mod, "RLIMIT_SIGPENDING", (int)RLIMIT_SIGPENDING); PyModule_AddIntConstant(mod, "RLIMIT_MSGQUEUE", (int)RLIMIT_MSGQUEUE); PyModule_AddIntConstant(mod, "VLIMIT_NSOCK", (int)VC_VLIMIT_NSOCK); PyModule_AddIntConstant(mod, "VLIMIT_OPENFD", (int)VC_VLIMIT_OPENFD); PyModule_AddIntConstant(mod, "VLIMIT_ANON", (int)VC_VLIMIT_ANON); PyModule_AddIntConstant(mod, "VLIMIT_SHMEM", (int)VC_VLIMIT_SHMEM); }