X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=python%2Fvserver.py;h=9c967eeeec1aac949f7ecb6bc7c8a46360ffcf9d;hb=036c0f5f754fde79c039d62e41ea8b10df473020;hp=9d194b55bb212fbcbd370252b3d0a9213f20ba8e;hpb=60b1bd932d29e2f6530b4945c9994de45cb55382;p=util-vserver.git diff --git a/python/vserver.py b/python/vserver.py index 9d194b5..9c967ee 100644 --- a/python/vserver.py +++ b/python/vserver.py @@ -4,29 +4,35 @@ import errno import fcntl import os import re +import pwd +import signal import sys import time import traceback -import linuxcaps -import passfdimpl -import vserverimpl - -from util_vserver_vars import * - -CAP_SAFE = (linuxcaps.CAP_CHOWN | - linuxcaps.CAP_DAC_OVERRIDE | - linuxcaps.CAP_DAC_READ_SEARCH | - linuxcaps.CAP_FOWNER | - linuxcaps.CAP_FSETID | - linuxcaps.CAP_KILL | - linuxcaps.CAP_SETGID | - linuxcaps.CAP_SETUID | - linuxcaps.CAP_SETPCAP | - linuxcaps.CAP_SYS_TTY_CONFIG | - linuxcaps.CAP_LEASE | - linuxcaps.CAP_SYS_CHROOT | - linuxcaps.CAP_SYS_PTRACE) +import mountimpl +import runcmd +import utmp +import vserverimpl, vduimpl +import cpulimit, bwlimit + +from vserverimpl import VS_SCHED_CPU_GUARANTEED as SCHED_CPU_GUARANTEED +from vserverimpl import DLIMIT_INF +from vserverimpl import VC_LIM_KEEP + +from vserverimpl import RLIMIT_CPU +from vserverimpl import RLIMIT_RSS +from vserverimpl import RLIMIT_NPROC +from vserverimpl import RLIMIT_NOFILE +from vserverimpl import RLIMIT_MEMLOCK +from vserverimpl import RLIMIT_AS +from vserverimpl import RLIMIT_LOCKS +from vserverimpl import RLIMIT_SIGPENDING +from vserverimpl import RLIMIT_MSGQUEUE +from vserverimpl import VLIMIT_NSOCK +from vserverimpl import VLIMIT_OPENFD +from vserverimpl import VLIMIT_ANON +from vserverimpl import VLIMIT_SHMEM # # these are the flags taken from the kernel linux/vserver/legacy.h @@ -41,25 +47,125 @@ FLAGS_ULIMIT = 64 FLAGS_NAMESPACE = 128 - +class NoSuchVServer(Exception): pass + + class VServer: - INITSCRIPTS = [('/etc/rc.vinit', 'start'), ('/etc/rc.d/rc', '3')] + INITSCRIPTS = [('/etc/rc.vinit', 'start'), + ('/etc/rc.d/rc', '%(runlevel)d')] - def __init__(self, name): + def __init__(self, name, vm_id = None, vm_running = False): self.name = name - self.config = self.__read_config_file("/etc/vservers.conf") - self.config.update(self.__read_config_file("/etc/vservers/%s.conf" % - self.name)) - self.flags = 0 - flags = self.config["S_FLAGS"].split(" ") - if "lock" in flags: - self.flags |= FLAGS_LOCK - if "nproc" in flags: - self.flags |= FLAGS_NPROC - self.remove_caps = ~CAP_SAFE - self.ctx = int(self.config["S_CONTEXT"]) + self.limits_changed = False + self.config_file = "/etc/vservers/%s.conf" % name + self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name) + if not (os.path.isdir(self.dir) and + os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)): + raise NoSuchVServer, "no such vserver: " + name + self.config = {} + for config_file in ["/etc/vservers.conf", self.config_file]: + try: + self.config.update(self.__read_config_file(config_file)) + except IOError, ex: + if ex.errno != errno.ENOENT: + raise + self.remove_caps = ~vserverimpl.CAP_SAFE; + if vm_id == None: + vm_id = int(self.config['S_CONTEXT']) + self.ctx = vm_id + self.vm_running = vm_running + + # For every resource key listed in the limit table, add in a + # new method with which one can get/set the resource's hard, + # soft, minimum limits. + limits = {"CPU": RLIMIT_CPU, + "RSS": RLIMIT_RSS, + "NPROC": RLIMIT_NPROC, + "NOFILE": RLIMIT_NOFILE, + "MEMLOCK": RLIMIT_MEMLOCK, + "AS": RLIMIT_AS, + "LOCKS": RLIMIT_LOCKS, + "SIGPENDING": RLIMIT_SIGPENDING, + "MSGQUEUE": RLIMIT_MSGQUEUE, + "NSOCK": VLIMIT_NSOCK, + "OPENFD": VLIMIT_OPENFD, + "ANON": VLIMIT_ANON, + "SHMEM": VLIMIT_SHMEM} + for meth in limits.keys(): + resource_type = limits[meth] + func = lambda \ + hard=VC_LIM_KEEP,\ + soft=VC_LIM_KEEP,\ + minimum=VC_LIM_KEEP:\ + self.__set_vserver_limit(resource_type,\ + hard, \ + soft,\ + minimum) + self.__dict__["set_%s_limit"%meth] = func + + func = lambda \ + hard=VC_LIM_KEEP,\ + soft=VC_LIM_KEEP,\ + minimum=VC_LIM_KEEP:\ + self.__set_vserver_config(meth, resource_type, \ + hard, \ + soft,\ + minimum) + self.__dict__["set_%s_config"%meth] = func + + func = lambda : self.__get_vserver_limit(resource_type) + self.__dict__["get_%s_limit"%meth] = func + + func = lambda : self.__get_vserver_config(meth,resource_type) + self.__dict__["get_%s_config"%meth] = func + + def have_limits_changed(self): + return self.limits_changed + + def __set_vserver_limit(self,resource_type,hard,soft,minimum): + """Generic set resource limit function for vserver""" + if self.is_running(): + changed = False + old_hard, old_soft, old_minimum = self.__get_vserver_limit(resource_type) + if old_hard != VC_LIM_KEEP and old_hard <> hard: changed = True + if old_soft != VC_LIM_KEEP and old_soft <> soft: changed = True + if old_minimu != VC_LIM_KEEP and old_minimum <> minimum: changed = True + self.limits_changed = self.limits_changed or changed + ret = vserverimpl.setrlimit(self.ctx,resource_type,hard,soft,minimum) + + def __set_vserver_config(self,meth,resource_type,hard,soft,minimum): + """Generic set resource limit function for vserver""" + resources = {} + if hard <> VC_LIM_KEEP: + resources["VS_%s_HARD"%meth] = hard + if soft <> VC_LIM_KEEP: + resources["VS_%s_SOFT"%meth] = soft + if minimum <> VC_LIM_KEEP: + resources["VS_%s_MINIMUM"%meth] = minimum + if len(resources)>0: + self.update_resources(resources) + self.__set_vserver_limit(resource_type,hard,soft,minimum) + + def __get_vserver_limit(self,resource_type): + """Generic get resource configuration function for vserver""" + if self.is_running(): + ret = vserverimpl.getrlimit(self.ctx,resource_type) + else: + ret = __get_vserver_config(meth,resource_type) + return ret + + def __get_vserver_config(self,meth,resource_type): + """Generic get resource configuration function for vserver""" + hard = int(self.config.get("VS_%s_HARD"%meth,VC_LIM_KEEP)) + soft = int(self.config.get("VS_%s_SOFT"%meth,VC_LIM_KEEP)) + minimum = int(self.config.get("VS_%s_MINIMUM"%meth,VC_LIM_KEEP)) + return (hard,soft,minimum) + + def set_WHITELISTED_config(self,whitelisted): + resources = {'VS_WHITELISTED': whitelisted} + self.update_resources(resources) config_var_re = re.compile(r"^ *([A-Z_]+)=(.*)\n?$", re.MULTILINE) @@ -74,75 +180,195 @@ class VServer: config[key] = val.strip('"') return config + def __update_config_file(self, filename, newvars): + + # read old file, apply changes + f = open(filename, "r") + data = f.read() + f.close() + todo = newvars.copy() + changed = False + offset = 0 + for m in self.config_var_re.finditer(data): + (key, val) = m.groups() + newval = todo.pop(key, None) + if newval != None: + data = data[:offset+m.start(2)] + str(newval) + data[offset+m.end(2):] + offset += len(str(newval)) - (m.end(2)-m.start(2)) + changed = True + for (newkey, newval) in todo.items(): + data += "%s=%s\n" % (newkey, newval) + changed = True + + if not changed: + return + + # write new file + newfile = filename + ".new" + f = open(newfile, "w") + f.write(data) + f.close() + + # replace old file with new + os.rename(newfile, filename) + def __do_chroot(self): - return os.chroot("%s/%s" % (VROOTDIR, self.name)) + os.chroot(self.dir) + os.chdir("/") - def open(self, filename, mode = "r", bufsize = -1): + def chroot_call(self, fn, *args): - (sendsock, recvsock) = passfdimpl.socketpair() - child_pid = os.fork() - if child_pid == 0: + cwd_fd = os.open(".", os.O_RDONLY) + try: + root_fd = os.open("/", os.O_RDONLY) try: - # child process self.__do_chroot() - f = open(filename, mode) - passfdimpl.sendmsg(f.fileno(), sendsock) - os._exit(0) - except EnvironmentError, ex: - (result, errmsg) = (ex.errno, ex.strerror) - except Exception, ex: - (result, errmsg) = (255, str(ex)) - os.write(sendsock, errmsg) - os._exit(result) - - # parent process + result = fn(*args) + finally: + os.fchdir(root_fd) + os.chroot(".") + os.fchdir(cwd_fd) + os.close(root_fd) + finally: + os.close(cwd_fd) + return result + + def set_disklimit(self, block_limit): + # block_limit is in kB + if block_limit == 0: + vserverimpl.unsetdlimit(self.dir, self.ctx) + return - # XXX - need this since a lambda can't raise an exception - def __throw(ex): - raise ex + if self.vm_running: + block_usage = vserverimpl.DLIMIT_KEEP + inode_usage = vserverimpl.DLIMIT_KEEP + else: + # init_disk_info() must have been called to get usage values + block_usage = self.disk_blocks + inode_usage = self.disk_inodes + + vserverimpl.setdlimit(self.dir, + self.ctx, + block_usage, + block_limit, + inode_usage, + vserverimpl.DLIMIT_INF, # inode limit + 2) # %age reserved for root + + resources = {'VS_DISK_MAX': block_limit} + self.update_resources(resources) + + def is_running(self): + return vserverimpl.isrunning(self.ctx) + + def get_disklimit(self): - os.close(sendsock) - throw = lambda : __throw(Exception(errmsg)) - while True: - try: - (pid, status) = os.waitpid(child_pid, 0) - if os.WIFEXITED(status): - result = os.WEXITSTATUS(status) - if result != 255: - errmsg = os.strerror(result) - throw = lambda : __throw(IOError(result, errmsg)) - else: - errmsg = "unexpected exception in child" - else: - result = -1 - errmsg = "child killed" - break - except OSError, ex: - if ex.errno != errno.EINTR: - os.close(recvsock) - raise ex - fcntl.fcntl(recvsock, fcntl.F_SETFL, os.O_NONBLOCK) try: - (fd, errmsg) = passfdimpl.recvmsg(recvsock) + (self.disk_blocks, block_limit, self.disk_inodes, inode_limit, + reserved) = vserverimpl.getdlimit(self.dir, self.ctx) except OSError, ex: - if ex.errno != errno.EAGAIN: - throw = lambda : __throw(ex) - fd = 0 - os.close(recvsock) - if not fd: - throw() + if ex.errno != errno.ESRCH: + raise + # get here if no vserver disk limit has been set for xid + block_limit = -1 + + return block_limit - return os.fdopen(fd, mode, bufsize) + def set_sched_config(self, cpu_share, sched_flags): - def __do_chcontext(self, state_file = None): + """ Write current CPU scheduler parameters to the vserver + configuration file. This method does not modify the kernel CPU + scheduling parameters for this context. """ - vserverimpl.chcontext(self.ctx, self.remove_caps) - if not state_file: + if cpu_share == int(self.config.get("CPULIMIT", -1)): return - print >>state_file, "S_CONTEXT=%d" % self.ctx - print >>state_file, "S_PROFILE=%s" % self.config.get("S_PROFILE", "") - state_file.close() + cpu_guaranteed = sched_flags & SCHED_CPU_GUARANTEED + cpu_config = { "CPULIMIT": cpu_share, "CPUGUARANTEED": cpu_guaranteed } + self.update_resources(cpu_config) + if self.vm_running: + self.set_sched(cpu_share, sched_flags) + + def set_sched(self, cpu_share, sched_flags = 0): + """ Update kernel CPU scheduling parameters for this context. """ + vserverimpl.setsched(self.ctx, cpu_share, sched_flags) + + def get_sched(self): + # have no way of querying scheduler right now on a per vserver basis + return (-1, False) + + def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None, + exempt_min = None, exempt_max = None, + share = None, dev = "eth0"): + + if minrate is None: + bwlimit.off(self.ctx, dev) + else: + bwlimit.on(self.ctx, dev, share, + minrate, maxrate, exempt_min, exempt_max) + + def get_bwlimit(self, dev = "eth0"): + + result = bwlimit.get(self.ctx) + # result of bwlimit.get is (ctx, share, minrate, maxrate) + if result: + result = result[1:] + return result + + def open(self, filename, mode = "r", bufsize = -1): + + return self.chroot_call(open, filename, mode, bufsize) + + def __do_chcontext(self, state_file): + + if state_file: + print >>state_file, "S_CONTEXT=%u" % self.ctx + print >>state_file, "S_PROFILE=" + state_file.close() + + if vserverimpl.chcontext(self.ctx): + self.set_resources() + vserverimpl.setup_done(self.ctx) + + def __prep(self, runlevel, log): + + """ Perform all the crap that the vserver script does before + actually executing the startup scripts. """ + + # remove /var/run and /var/lock/subsys files + # but don't remove utmp from the top-level /var/run + RUNDIR = "/var/run" + LOCKDIR = "/var/lock/subsys" + filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs) + garbage = reduce((lambda (out, ff), (dir, subdirs, files): + (out + map((dir + "/").__add__, ff(files)), + lambda fs: fs)), + list(os.walk(RUNDIR)), + ([], filter_fn))[0] + garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__, + os.listdir(LOCKDIR))) + for f in garbage: + os.unlink(f) + + # set the initial runlevel + f = open(RUNDIR + "/utmp", "w") + utmp.set_runlevel(f, runlevel) + f.close() + + # mount /proc and /dev/pts + self.__do_mount("none", "/proc", "proc") + # XXX - magic mount options + self.__do_mount("none", "/dev/pts", "devpts", 0, "gid=5,mode=0620") + + def __do_mount(self, *mount_args): + + try: + mountimpl.mount(*mount_args) + except OSError, ex: + if ex.errno == errno.EBUSY: + # assume already mounted + return + raise ex def enter(self): @@ -150,7 +376,9 @@ class VServer: self.__do_chroot() self.__do_chcontext(state_file) - def start(self): + def start(self, wait, runlevel = 3): + + self.vm_running = True child_pid = os.fork() if child_pid == 0: @@ -170,35 +398,42 @@ class VServer: log = open("/var/log/boot.log", "w", 0) os.dup2(1, 2) - # write same output that vserver script does print >>log, ("%s: starting the virtual server %s" % (time.asctime(time.gmtime()), self.name)) + # perform pre-init cleanup + self.__prep(runlevel, log) + # execute each init script in turn # XXX - we don't support all scripts that vserver script does cmd_pid = 0 + first_child = True for cmd in self.INITSCRIPTS + [None]: - # don't bother waiting for last command to terminate - if cmd == None: - os._exit(0) - - # wait for previous command to terminate - if cmd_pid: + # wait for previous command to terminate, unless it + # is the last one and the caller has specified to wait + if cmd_pid and (cmd != None or wait): try: os.waitpid(cmd_pid, 0) except: print >>log, "error waiting for %s:" % cmd_pid traceback.print_exc() + # end of list + if cmd == None: + os._exit(0) + # fork and exec next command cmd_pid = os.fork() if cmd_pid == 0: try: # enter vserver context self.__do_chcontext(state_file) - print >>log, "executing '%s'" % " ".join(cmd) - os.execl(cmd[0], *cmd) - finally: + arg_subst = { 'runlevel': runlevel } + cmd_args = [cmd[0]] + map(lambda x: x % arg_subst, + cmd[1:]) + print >>log, "executing '%s'" % " ".join(cmd_args) + os.execl(cmd[0], *cmd_args) + except: traceback.print_exc() os._exit(1) else: @@ -212,3 +447,40 @@ class VServer: # parent process return child_pid + + def set_resources(self): + + """ Called when vserver context is entered for first time, + should be overridden by subclass. """ + + pass + + def update_resources(self, resources): + + self.config.update(resources) + + # write new values to configuration file + self.__update_config_file(self.config_file, resources) + + def init_disk_info(self): + + (self.disk_inodes, self.disk_blocks, size) = vduimpl.vdu(self.dir) + + return size + + def stop(self, signal = signal.SIGKILL): + + vserverimpl.killall(self.ctx, signal) + self.vm_running = False + + + +def create(vm_name, static = False, ctor = VServer): + + options = [] + if static: + options += ['--static'] + runcmd.run('vuseradd', options + [vm_name]) + vm_id = pwd.getpwnam(vm_name)[2] + + return ctor(vm_name, vm_id)