1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/resource.h>
39 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <arpa/inet.h>
47 #include <sys/mount.h>
52 #include "planetlab.h"
54 static inline PyObject *inc_and_ret_none(void)
60 #define NONE inc_and_ret_none()
62 #define PL_INSECURE_BCAPS (vc_get_insecurebcaps() | (1 << VC_CAP_NET_BIND_SERVICE))
68 vserver_chcontext(PyObject *self, PyObject *args)
72 uint_least64_t bcaps = 0;
74 if (!PyArg_ParseTuple(args, "I|K", &ctx, &bcaps))
76 bcaps |= ~PL_INSECURE_BCAPS;
78 if ((ctx_is_new = pl_chcontext(ctx, bcaps, 0)) < 0)
79 return PyErr_SetFromErrno(PyExc_OSError);
81 return PyBool_FromLong(ctx_is_new);
85 vserver_setup_done(PyObject *self, PyObject *args)
89 if (!PyArg_ParseTuple(args, "I", &ctx))
92 if (pl_setup_done(ctx) < 0)
93 return PyErr_SetFromErrno(PyExc_OSError);
99 vserver_isrunning(PyObject *self, PyObject *args)
106 if (!PyArg_ParseTuple(args, "I", &ctx))
109 sprintf(fname,"/proc/virtual/%d", ctx);
111 if(stat(&fname[0],&statbuf)==0)
112 ret = PyBool_FromLong(1);
114 ret = PyBool_FromLong(0);
120 __vserver_get_rlimit(xid_t xid, int resource) {
121 struct vc_rlimit limits;
125 if (vc_get_rlimit(xid, resource, &limits)==-1)
126 ret = PyErr_SetFromErrno(PyExc_OSError);
128 ret = Py_BuildValue("LLL",limits.hard, limits.soft, limits.min);
134 vserver_get_rlimit(PyObject *self, PyObject *args) {
139 if (!PyArg_ParseTuple(args, "Ii", &xid, &resource))
142 ret = __vserver_get_rlimit(xid, resource);
148 vserver_set_rlimit(PyObject *self, PyObject *args) {
149 struct vc_rlimit limits;
150 struct vc_rlimit_mask mask;
156 limits.min = VC_LIM_KEEP;
157 limits.soft = VC_LIM_KEEP;
158 limits.hard = VC_LIM_KEEP;
160 if (!PyArg_ParseTuple(args, "IiLLL", &xid, &resource, &limits.hard, &limits.soft, &limits.min))
165 if (vc_get_rlimit_mask(xid, &mask)==-1) {
166 ret = PyErr_SetFromErrno(PyExc_OSError);
168 bitmask = (1<<resource);
169 if ((mask.min|mask.soft|mask.hard) & bitmask)
170 if (vc_set_rlimit(xid, resource, &limits)==-1)
171 ret = PyErr_SetFromErrno(PyExc_OSError);
173 ret = __vserver_get_rlimit(xid, resource);
183 vserver_setsched(PyObject *self, PyObject *args)
189 if (!PyArg_ParseTuple(args, "II|I", &ctx, &cpu_min, &cpu_share))
192 /* ESRCH indicates that there are no processes in the context */
193 if (pl_setsched(ctx, cpu_min, cpu_share) &&
195 return PyErr_SetFromErrno(PyExc_OSError);
201 vserver_get_dlimit(PyObject *self, PyObject *args)
206 struct vc_ctx_dlimit data;
209 if (!PyArg_ParseTuple(args, "si", &path,&xid))
212 memset(&data, 0, sizeof(data));
213 r = vc_get_dlimit(path, xid, 0, &data);
215 res = Py_BuildValue("(i,i,i,i,i)",
222 res = PyErr_SetFromErrno(PyExc_OSError);
230 vserver_set_dlimit(PyObject *self, PyObject *args)
234 struct vc_ctx_dlimit data;
236 memset(&data,0,sizeof(data));
237 if (!PyArg_ParseTuple(args, "siiiiii", &path,
246 if ((vc_add_dlimit(path, xid, 0) && errno != EEXIST) ||
247 vc_set_dlimit(path, xid, 0, &data))
248 return PyErr_SetFromErrno(PyExc_OSError);
254 vserver_unset_dlimit(PyObject *self, PyObject *args)
259 if (!PyArg_ParseTuple(args, "si", &path, &xid))
262 if (vc_rem_dlimit(path, xid, 0) && errno != ESRCH)
263 return PyErr_SetFromErrno(PyExc_OSError);
269 vserver_killall(PyObject *self, PyObject *args)
273 struct vc_ctx_flags cflags = {
275 .mask = VC_VXF_PERSISTENT
277 struct vc_net_flags nflags = {
279 .mask = VC_NXF_PERSISTENT
282 if (!PyArg_ParseTuple(args, "Ii", &ctx, &sig))
285 if (vc_ctx_kill(ctx, 0, sig) && errno != ESRCH)
286 return PyErr_SetFromErrno(PyExc_OSError);
288 if (vc_set_cflags(ctx, &cflags) && errno != ESRCH)
289 return PyErr_SetFromErrno(PyExc_OSError);
291 if (vc_set_nflags(ctx, &nflags) && errno != ESRCH)
292 return PyErr_SetFromErrno(PyExc_OSError);
298 vserver_set_bcaps(PyObject *self, PyObject *args)
301 struct vc_ctx_caps caps;
303 if (!PyArg_ParseTuple(args, "IK", &ctx, &caps.bcaps))
306 caps.bmask = PL_INSECURE_BCAPS;
307 caps.cmask = caps.ccaps = 0;
308 if (vc_set_ccaps(ctx, &caps) == -1 && errno != ESRCH)
309 return PyErr_SetFromErrno(PyExc_OSError);
315 vserver_text2bcaps(PyObject *self, PyObject *args)
317 struct vc_ctx_caps caps = { .bcaps = 0 };
320 struct vc_err_listparser err;
322 if (!PyArg_ParseTuple(args, "s#", &list, &len))
325 vc_list2bcap(list, len, &err, &caps);
327 return Py_BuildValue("K", caps.bcaps);
331 vserver_get_bcaps(PyObject *self, PyObject *args)
334 struct vc_ctx_caps caps;
336 if (!PyArg_ParseTuple(args, "I", &ctx))
339 if (vc_get_ccaps(ctx, &caps) == -1) {
341 return PyErr_SetFromErrno(PyExc_OSError);
346 return Py_BuildValue("K", caps.bcaps & PL_INSECURE_BCAPS);
350 vserver_bcaps2text(PyObject *self, PyObject *args)
352 struct vc_ctx_caps caps = { .bcaps = 0 };
356 if (!PyArg_ParseTuple(args, "K", &caps.bcaps))
359 list = PyString_FromString("");
361 while ((cap = vc_lobcap2text(&caps.bcaps)) != NULL) {
364 PyString_ConcatAndDel(&list, PyString_FromFormat(
365 (PyString_Size(list) > 0 ? ",CAP_%s" : "CAP_%s" ),
373 convert_address(const char *str, struct vc_net_addr *addr)
376 if (inet_pton(AF_INET6, str, addr->vna_v6_ip.s6_addr) > 0) {
377 addr->vna_type = VC_NXA_TYPE_IPV6;
380 else if (inet_pton(AF_INET, str, &addr->vna_v4_ip.s_addr) > 0) {
381 addr->vna_type = VC_NXA_TYPE_IPV4;
388 mask_to_prefix(void *data, int limit)
390 uint8_t *mask = data;
392 for (prefix = 0; prefix < limit && mask[prefix >> 3] & (1 << (prefix & 0x07)); prefix++)
398 get_mask(struct vc_net_addr *addr)
400 struct ifaddrs *head, *ifa;
402 int family, offset, len;
405 switch (addr->vna_type) {
406 case VC_NXA_TYPE_IPV4:
408 offset = offsetof(struct sockaddr_in, sin_addr.s_addr);
409 ip = &addr->vna_v4_ip.s_addr;
411 addr->vna_v4_mask.s_addr = htonl(0xffffff00);
412 addr->vna_prefix = 24;
414 case VC_NXA_TYPE_IPV6:
416 offset = offsetof(struct sockaddr_in6, sin6_addr.s6_addr);
417 ip = addr->vna_v6_ip.s6_addr;
419 addr->vna_v6_mask.s6_addr32[9] = addr->vna_v6_mask.s6_addr32[1] = 0xffffffff;
420 addr->vna_v6_mask.s6_addr32[2] = addr->vna_v6_mask.s6_addr32[3] = 0x00000000;
421 addr->vna_prefix = 64;
428 if (getifaddrs(&head) == -1)
430 for (ifa = head; ifa; ifa = ifa->ifa_next) {
431 if (ifa->ifa_addr->sa_family == family &&
432 memcmp((char *) ifa->ifa_addr + offset, ip, len) == 0) {
433 switch (addr->vna_type) {
434 case VC_NXA_TYPE_IPV4:
435 memcpy(&addr->vna_v4_mask.s_addr, ifa->ifa_netmask + offset, len);
436 addr->vna_prefix = mask_to_prefix(&addr->vna_v4_mask.s_addr, 32);
438 case VC_NXA_TYPE_IPV6:
439 memcpy(addr->vna_v6_mask.s6_addr, ifa->ifa_netmask + offset, len);
440 addr->vna_prefix = mask_to_prefix(addr->vna_v6_mask.s6_addr, 128);
451 /* XXX These two functions are really similar */
453 vserver_net_add(PyObject *self, PyObject *args)
455 struct vc_net_addr addr = { .vna_type = 0 };
459 if (!PyArg_ParseTuple(args, "Is", &nid, &ip))
462 /* Optimize standard case, which also needs to be handled differently */
463 if (strcmp(ip, "0.0.0.0") == 0) {
464 addr.vna_type = VC_NXA_TYPE_MASK | VC_NXA_TYPE_IPV4;
468 addr.vna_v4_mask.s_addr = 0;
469 addr.vna_v4_ip.s_addr = 0;
472 if (convert_address(ip, &addr) == -1)
473 return PyErr_Format(PyExc_ValueError, "%s is not a valid IP address", ip);
475 switch (get_mask(&addr)) {
477 return PyErr_SetFromErrno(PyExc_OSError);
479 /* XXX error here? */
482 addr.vna_type |= VC_NXA_TYPE_ADDR;
485 if (vc_net_add(nid, &addr) == -1 && errno != ESRCH)
486 return PyErr_SetFromErrno(PyExc_OSError);
492 vserver_net_remove(PyObject *self, PyObject *args)
494 struct vc_net_addr addr;
498 if (!PyArg_ParseTuple(args, "Is", &nid, &ip))
501 if (strcmp(ip, "all") == 0)
502 addr.vna_type = VC_NXA_TYPE_ANY;
503 else if (strcmp(ip, "all4") == 0)
504 addr.vna_type = VC_NXA_TYPE_IPV4 | VC_NXA_TYPE_ANY;
505 else if (strcmp(ip, "all6") == 0)
506 addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ANY;
508 if (convert_address(ip, &addr) == -1)
509 return PyErr_Format(PyExc_ValueError, "%s is not a valid IP address", ip);
511 switch (get_mask(&addr)) {
513 return PyErr_SetFromErrno(PyExc_OSError);
516 addr.vna_type |= VC_NXA_TYPE_ADDR;
519 if (vc_net_remove(nid, &addr) == -1 && errno != ESRCH)
520 return PyErr_SetFromErrno(PyExc_OSError);
535 if (fchdir(fd) == -1 || chroot(".") == -1)
541 restore_dirs(struct secure_dirs *dirs)
543 if (dirs->host_fd != -1) {
544 if (fchroot(dirs->host_fd) == -1)
546 if (close(dirs->host_fd) == -1)
549 if (dirs->guest_fd != -1) {
550 if (close(dirs->guest_fd) == -1)
553 if (dirs->target_fd != -1) {
554 if (close(dirs->target_fd) == -1)
557 if (dirs->cwd_fd != -1) {
558 if (fchdir(dirs->cwd_fd) == -1)
560 if (close(dirs->cwd_fd) == -1)
567 secure_chdir(struct secure_dirs *dirs, const char *guest, const char *target)
569 dirs->host_fd = dirs->cwd_fd = dirs->guest_fd = dirs->target_fd = -1;
571 dirs->host_fd = open("/", O_RDONLY|O_DIRECTORY);
572 if (dirs->host_fd == -1)
575 dirs->cwd_fd = open(".", O_RDONLY|O_DIRECTORY);
576 if (dirs->cwd_fd == -1)
579 dirs->guest_fd = open(guest, O_RDONLY|O_DIRECTORY);
580 if (dirs->guest_fd == -1)
582 if (fchroot(dirs->guest_fd) == -1)
585 dirs->target_fd = open(target, O_RDONLY|O_DIRECTORY);
586 if (dirs->target_fd == -1)
589 if (fchroot(dirs->host_fd) == -1 || close(dirs->host_fd) == -1)
592 if (close(dirs->guest_fd) == -1)
596 if (fchdir(dirs->target_fd) == -1 || close(dirs->target_fd) == -1)
603 vserver_mount(PyObject *self, PyObject *args)
605 const char *guest, *target, *source, *type, *data = NULL;
606 unsigned long flags = 0;
607 struct secure_dirs dirs;
609 if (!PyArg_ParseTuple(args, "ssss|ks", &source, &guest, &target, &type,
613 if (secure_chdir(&dirs, guest, target) == -1)
615 if (mount(source, ".", type, flags, data) == -1 && errno != EBUSY)
623 return PyErr_SetFromErrno(PyExc_OSError);
627 vserver_umount(PyObject *self, PyObject *args)
629 const char *guest, *target;
634 if (!PyArg_ParseTuple(args, "ss|i", &guest, &target, &flags))
637 path = calloc(strlen(guest) + strlen(target) + 2, sizeof(char));
638 sprintf(path, "%s/%s", guest, target);
639 if (umount2(path, flags) == -1)
640 ret = PyErr_SetFromErrno(PyExc_OSError);
649 vserver_set_runlevel(PyObject *self, PyObject *args)
655 if (!PyArg_ParseTuple(args, "si", &file, &runlevel))
660 memset(&ut, 0, sizeof(ut));
661 ut.ut_type = RUN_LVL;
662 ut.ut_pid = ('#' << 8) + runlevel + '0';
670 vserver_set_name(PyObject *self, PyObject *args)
675 if (!PyArg_ParseTuple(args, "II", &ctx, &slice_id))
678 if (vc_set_vhi_name(ctx, vcVHI_CONTEXT, (char *)&slice_id, sizeof(slice_id)) != 0 && errno != ESRCH) {
679 return PyErr_SetFromErrno(PyExc_OSError);
686 vserver_get_name(PyObject *self, PyObject *args)
691 if (!PyArg_ParseTuple(args, "I", &ctx))
694 if (vc_get_vhi_name(ctx, vcVHI_CONTEXT, (char *)&slice_id, sizeof(slice_id)) != 0) {
695 ret = PyErr_SetFromErrno(PyExc_OSError);
697 ret = Py_BuildValue("i", slice_id);
702 static PyMethodDef methods[] = {
703 { "chcontext", vserver_chcontext, METH_VARARGS,
704 "chcontext to vserver with provided flags" },
705 { "setup_done", vserver_setup_done, METH_VARARGS,
706 "Release vserver setup lock" },
707 { "setsched", vserver_setsched, METH_VARARGS,
708 "Change vserver scheduling attributes for given vserver context" },
709 { "setdlimit", vserver_set_dlimit, METH_VARARGS,
710 "Set disk limits for given vserver context" },
711 { "unsetdlimit", vserver_unset_dlimit, METH_VARARGS,
712 "Remove disk limits for given vserver context" },
713 { "getdlimit", vserver_get_dlimit, METH_VARARGS,
714 "Get disk limits for given vserver context" },
715 { "setrlimit", vserver_set_rlimit, METH_VARARGS,
716 "Set resource limits for given resource of a vserver context" },
717 { "getrlimit", vserver_get_rlimit, METH_VARARGS,
718 "Get resource limits for given resource of a vserver context" },
719 { "killall", vserver_killall, METH_VARARGS,
720 "Send signal to all processes in vserver context" },
721 { "isrunning", vserver_isrunning, METH_VARARGS,
722 "Check if vserver is running"},
723 { "setbcaps", vserver_set_bcaps, METH_VARARGS,
724 "Set POSIX capabilities of a vserver context" },
725 { "getbcaps", vserver_get_bcaps, METH_VARARGS,
726 "Get POSIX capabilities of a vserver context" },
727 { "text2bcaps", vserver_text2bcaps, METH_VARARGS,
728 "Translate a string of capabilities to a bitmap" },
729 { "bcaps2text", vserver_bcaps2text, METH_VARARGS,
730 "Translate a capability-bitmap into a string" },
731 { "netadd", vserver_net_add, METH_VARARGS,
732 "Assign an IP address to a context" },
733 { "netremove", vserver_net_remove, METH_VARARGS,
734 "Remove IP address(es) from a context" },
735 { "mount", vserver_mount, METH_VARARGS,
736 "Perform the mount() system call" },
737 { "umount", vserver_umount, METH_VARARGS,
738 "Perform the umount2() system call" },
739 { "setrunlevel", vserver_set_runlevel, METH_VARARGS,
740 "Set the runlevel in utmp" },
741 { "setname", vserver_set_name, METH_VARARGS,
742 "Set the vcVHI_CONTEXT for a xid." },
743 { "getname", vserver_get_name, METH_VARARGS,
744 "Get the vcVHI_CONTEXT for a xid." },
745 { NULL, NULL, 0, NULL }
749 initvserverimpl(void)
753 mod = Py_InitModule("vserverimpl", methods);
755 /* export the set of 'safe' capabilities */
756 PyModule_AddIntConstant(mod, "CAP_SAFE", ~vc_get_insecurebcaps());
758 /* export the default vserver directory */
759 PyModule_AddStringConstant(mod, "VSERVER_BASEDIR", DEFAULT_VSERVERDIR);
761 /* export limit-related constants */
762 PyModule_AddIntConstant(mod, "DLIMIT_KEEP", (int)VC_CDLIM_KEEP);
763 PyModule_AddIntConstant(mod, "DLIMIT_INF", (int)VC_CDLIM_INFINITY);
764 PyModule_AddIntConstant(mod, "VC_LIM_KEEP", (int)VC_LIM_KEEP);
766 PyModule_AddIntConstant(mod, "RLIMIT_CPU", (int)RLIMIT_CPU);
767 PyModule_AddIntConstant(mod, "RLIMIT_RSS", (int)RLIMIT_RSS);
768 PyModule_AddIntConstant(mod, "RLIMIT_NPROC", (int)RLIMIT_NPROC);
769 PyModule_AddIntConstant(mod, "RLIMIT_NOFILE", (int)RLIMIT_NOFILE);
770 PyModule_AddIntConstant(mod, "RLIMIT_MEMLOCK", (int)RLIMIT_MEMLOCK);
771 PyModule_AddIntConstant(mod, "RLIMIT_AS", (int)RLIMIT_AS);
772 PyModule_AddIntConstant(mod, "RLIMIT_LOCKS", (int)RLIMIT_LOCKS);
774 PyModule_AddIntConstant(mod, "RLIMIT_SIGPENDING", (int)RLIMIT_SIGPENDING);
775 PyModule_AddIntConstant(mod, "RLIMIT_MSGQUEUE", (int)RLIMIT_MSGQUEUE);
777 PyModule_AddIntConstant(mod, "VLIMIT_NSOCK", (int)VC_VLIMIT_NSOCK);
778 PyModule_AddIntConstant(mod, "VLIMIT_OPENFD", (int)VC_VLIMIT_OPENFD);
779 PyModule_AddIntConstant(mod, "VLIMIT_ANON", (int)VC_VLIMIT_ANON);
780 PyModule_AddIntConstant(mod, "VLIMIT_SHMEM", (int)VC_VLIMIT_SHMEM);