# Copyright 2005 Princeton University #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $ import errno import fcntl import os import re import pwd import signal import sys import time import traceback import subprocess import resource import vserverimpl import cpulimit, bwlimit from vserverimpl import DLIMIT_INF from vserverimpl import VC_LIM_KEEP from vserverimpl import VLIMIT_NSOCK from vserverimpl import VLIMIT_OPENFD from vserverimpl import VLIMIT_ANON from vserverimpl import VLIMIT_SHMEM # # these are the flags taken from the kernel linux/vserver/legacy.h # FLAGS_LOCK = 1 FLAGS_SCHED = 2 # XXX - defined in util-vserver/src/chcontext.c FLAGS_NPROC = 4 FLAGS_PRIVATE = 8 FLAGS_INIT = 16 FLAGS_HIDEINFO = 32 FLAGS_ULIMIT = 64 FLAGS_NAMESPACE = 128 RLIMITS = { "NSOCK": VLIMIT_NSOCK, "OPENFD": VLIMIT_OPENFD, "ANON": VLIMIT_ANON, "SHMEM": VLIMIT_SHMEM} # add in the platform supported rlimits for entry in resource.__dict__.keys(): if entry.find("RLIMIT_")==0: k = entry[len("RLIMIT_"):] if not RLIMITS.has_key(k): RLIMITS[k]=resource.__dict__[entry] else: print "WARNING: duplicate RLIMITS key %s" % k class NoSuchVServer(Exception): pass class VServerConfig: def __init__(self, name, directory): self.name = name self.dir = directory self.cache = None if not (os.path.isdir(self.dir) and os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)): raise NoSuchVServer, "%s does not exist" % self.dir def get(self, option, default = None): try: if self.cache: return self.cache[option] else: f = open(os.path.join(self.dir, option), "r") buf = f.read().rstrip() f.close() return buf except: if default is not None: return default else: raise KeyError, "Key %s is not set for %s" % (option, self.name) def update(self, option, value): if self.cache: return try: old_umask = os.umask(0022) filename = os.path.join(self.dir, option) try: os.makedirs(os.path.dirname(filename), 0755) except: pass f = open(filename, 'w') if isinstance(value, list): f.write("%s\n" % "\n".join(value)) else: f.write("%s\n" % value) f.close() os.umask(old_umask) except: raise def unset(self, option): if self.cache: return try: filename = os.path.join(self.dir, option) os.unlink(filename) try: os.removedirs(os.path.dirname(filename)) except: pass return True except: return False def cache_it(self): self.cache = {} def add_to_cache(cache, dirname, fnames): for file in fnames: full_name = os.path.join(dirname, file) if os.path.islink(full_name): fnames.remove(file) elif (os.path.isfile(full_name) and os.access(full_name, os.R_OK)): f = open(full_name, "r") cache[full_name.replace(os.path.join(self.dir, ''), '')] = f.read().rstrip() f.close() os.path.walk(self.dir, add_to_cache, self.cache) class VServer: INITSCRIPTS = [('/etc/rc.vinit', 'start'), ('/etc/rc.d/rc', '%(runlevel)d')] def __init__(self, name, vm_id = None, vm_running = None, logfile=None): self.name = name self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name) if not (os.path.isdir(self.dir) and os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)): raise NoSuchVServer, "no such vserver: " + name self.config = VServerConfig(name, "/etc/vservers/%s" % name) self.remove_caps = ~vserverimpl.CAP_SAFE; if vm_id == None: vm_id = int(self.config.get('context')) self.ctx = vm_id if vm_running == None: vm_running = self.is_running() self.vm_running = vm_running self.logfile = logfile # inspired from nodemanager's logger def log(self,msg): if self.logfile: try: fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600) if not msg.endswith('\n'): msg += '\n' os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg)) os.close(fd) except: print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg) def set_rlimit(self, type, hard, soft, min): """Generic set resource limit function for vserver""" global RLIMITS update = False if hard <> VC_LIM_KEEP: self.config.update('rlimits/%s.hard' % type.lower(), hard) update = True if soft <> VC_LIM_KEEP: self.config.update('rlimits/%s.soft' % type.lower(), soft) update = True if min <> VC_LIM_KEEP: self.config.update('rlimits/%s.min' % type.lower(), min) update = True if self.is_running() and update: resource_type = RLIMITS[type] try: vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min) except OSError, e: self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s" % (self.ctx, type.lower(), hard, soft, min)) return update def set_capabilities(self, capabilities): return vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(capabilities)) def set_capabilities_config(self, capabilities): self.config.update('bcapabilities', capabilities) self.set_capabilities(capabilities) def get_capabilities(self): return vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx)) def get_capabilities_config(self): return self.config.get('bcapabilities', '') def set_ipaddresses(self, addresses): vserverimpl.netremove(self.ctx, "all") for a in addresses.split(","): vserverimpl.netadd(self.ctx, a) def set_ipaddresses_config(self, addresses): i = 0 for a in addresses.split(","): self.config.update("interfaces/%d/ip" % i, a) i += 1 while self.config.unset("interfaces/%d/ip" % i): i += 1 self.set_ipaddresses(addresses) def get_ipaddresses_config(self): i = 0 ret = [] while True: r = self.config.get("interfaces/%d/ip" % i, '') if r == '': break ret += [r] i += 1 return ",".join(ret) def get_ipaddresses(self): # No clean way to do this right now. return None def __do_chroot(self): os.chroot(self.dir) os.chdir("/") def chroot_call(self, fn, *args): cwd_fd = os.open(".", os.O_RDONLY) try: root_fd = os.open("/", os.O_RDONLY) try: self.__do_chroot() result = fn(*args) finally: os.fchdir(root_fd) os.chroot(".") os.fchdir(cwd_fd) os.close(root_fd) finally: os.close(cwd_fd) return result def set_disklimit(self, block_limit): # block_limit is in kB if block_limit == 0: try: vserverimpl.unsetdlimit(self.dir, self.ctx) except OSError, e: self.log("Unexpected error with unsetdlimit for context %d" % self.ctx) return if self.vm_running: block_usage = vserverimpl.DLIMIT_KEEP inode_usage = vserverimpl.DLIMIT_KEEP else: # init_disk_info() must have been called to get usage values block_usage = self.disk_blocks inode_usage = self.disk_inodes try: vserverimpl.setdlimit(self.dir, self.ctx, block_usage, block_limit, inode_usage, vserverimpl.DLIMIT_INF, # inode limit 2) # %age reserved for root except OSError, e: self.log("Unexpected error with setdlimit for context %d" % self.ctx) self.config.update('dlimits/0/space_total', block_limit) def is_running(self): return vserverimpl.isrunning(self.ctx) def get_disklimit(self): try: (self.disk_blocks, block_limit, self.disk_inodes, inode_limit, reserved) = vserverimpl.getdlimit(self.dir, self.ctx) except OSError, ex: if ex.errno != errno.ESRCH: raise # get here if no vserver disk limit has been set for xid block_limit = -1 return block_limit def set_sched_config(self, cpu_min, cpu_share): """ Write current CPU scheduler parameters to the vserver configuration file. This method does not modify the kernel CPU scheduling parameters for this context. """ self.config.update('sched/fill-rate', cpu_min) self.config.update('sched/fill-rate2', cpu_share) if cpu_share == 0: self.config.unset('sched/idle-time') if self.is_running(): self.set_sched(cpu_min, cpu_share) def set_sched(self, cpu_min, cpu_share): """ Update kernel CPU scheduling parameters for this context. """ vserverimpl.setsched(self.ctx, cpu_min, cpu_share) def get_sched(self): # have no way of querying scheduler right now on a per vserver basis return (-1, False) def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None, exempt_min = None, exempt_max = None, share = None, dev = "eth0"): if minrate is None: bwlimit.off(self.ctx, dev) else: bwlimit.on(self.ctx, dev, share, minrate, maxrate, exempt_min, exempt_max) def get_bwlimit(self, dev = "eth0"): result = bwlimit.get(self.ctx) # result of bwlimit.get is (ctx, share, minrate, maxrate) if result: result = result[1:] return result def open(self, filename, mode = "r", bufsize = -1): return self.chroot_call(open, filename, mode, bufsize) def __do_chcontext(self, state_file): if state_file: print >>state_file, "%u" % self.ctx state_file.close() if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())): self.set_resources() vserverimpl.setup_done(self.ctx) def __prep(self, runlevel): """ Perform all the crap that the vserver script does before actually executing the startup scripts. """ # remove /var/run and /var/lock/subsys files # but don't remove utmp from the top-level /var/run RUNDIR = "/var/run" LOCKDIR = "/var/lock/subsys" filter_fn = lambda fs: filter(lambda f: f != 'utmp', fs) garbage = reduce((lambda (out, ff), (dir, subdirs, files): (out + map((dir + "/").__add__, ff(files)), lambda fs: fs)), list(os.walk(RUNDIR)), ([], filter_fn))[0] garbage += filter(os.path.isfile, map((LOCKDIR + "/").__add__, os.listdir(LOCKDIR))) if False: for f in garbage: os.unlink(f) # set the initial runlevel vserverimpl.setrunlevel(RUNDIR + "/utmp", runlevel) # mount /proc and /dev/pts self.__do_mount("none", self.dir, "/proc", "proc") # XXX - magic mount options self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620") def __do_mount(self, *mount_args): try: vserverimpl.mount(*mount_args) except OSError, ex: if ex.errno == errno.EBUSY: # assume already mounted return raise ex def enter(self): self.config.cache_it() self.__do_chroot() self.__do_chcontext(None) def start(self, wait, runlevel = 3): self.vm_running = True child_pid = os.fork() if child_pid == 0: # child process try: # get a new session os.setsid() # open state file to record vserver info state_file = open("/var/run/vservers/%s" % self.name, "w") # use /dev/null for stdin, /var/log/boot.log for stdout/err fd = os.open("/dev/null", os.O_RDONLY) if fd != 0: os.dup2(fd, 0) os.close(fd) # perform pre-init cleanup self.__prep(runlevel) self.config.cache_it() self.__do_chroot() log = open("/var/log/boot.log", "a", 0) if log.fileno() != 1: os.dup2(log.fileno(), 1) os.dup2(1, 2) print >>log, ("%s: starting the virtual server %s" % (time.asctime(time.gmtime()), self.name)) # execute each init script in turn # XXX - we don't support all scripts that vserver script does self.__do_chcontext(state_file) for cmd in self.INITSCRIPTS: try: # enter vserver context arg_subst = { 'runlevel': runlevel } cmd_args = [cmd[0]] + map(lambda x: x % arg_subst, cmd[1:]) print >>log, "executing '%s'" % " ".join(cmd_args) os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args) except: print >>log, traceback.format_exc() os._exit(1) # we get here due to an exception in the top-level child process except Exception, ex: self.log(traceback.format_exc()) os._exit(0) # parent process return child_pid def set_resources(self): """ Called when vserver context is entered for first time, should be overridden by subclass. """ pass def init_disk_info(self): cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) p.stdin.close() line = p.stdout.readline() if not line: sys.stderr.write(p.stderr.read()) p.stdout.close() p.stderr.close() ret = p.wait() (space, inodes) = line.split() self.disk_inodes = int(inodes) self.disk_blocks = int(space) #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir) return self.disk_blocks * 1024 def stop(self, signal = signal.SIGKILL): vserverimpl.killall(self.ctx, signal) self.vm_running = False def create(vm_name, static = False, ctor = VServer): options = ['vuseradd'] if static: options += ['--static'] ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name]) if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0: out = "system command ('%s') " % options if os.WIFEXITED(ret): out += "failed, rc = %d" % os.WEXITSTATUS(ret) else: out += "killed by signal %d" % os.WTERMSIG(ret) raise SystemError, out vm_id = pwd.getpwnam(vm_name)[2] return ctor(vm_name, vm_id)