# Copyright 2005 Princeton University #$Id: vserver.py,v 1.72 2007/08/02 16:01:59 dhozac Exp $ import errno import fcntl import os import re import pwd import signal import sys import time import traceback import subprocess import commands import resource import vserverimpl import cpulimit import plnode.bwlimit as bwlimit from vserverimpl import DLIMIT_INF from vserverimpl import VC_LIM_KEEP from vserverimpl import VC_LIM_INFINITY from vserverimpl import VLIMIT_NSOCK from vserverimpl import VLIMIT_OPENFD from vserverimpl import VLIMIT_ANON from vserverimpl import VLIMIT_SHMEM # # these are the flags taken from the kernel linux/vserver/legacy.h # FLAGS_LOCK = 1 FLAGS_SCHED = 2 # XXX - defined in util-vserver/src/chcontext.c FLAGS_NPROC = 4 FLAGS_PRIVATE = 8 FLAGS_INIT = 16 FLAGS_HIDEINFO = 32 FLAGS_ULIMIT = 64 FLAGS_NAMESPACE = 128 RLIMITS = { "NSOCK": VLIMIT_NSOCK, "OPENFD": VLIMIT_OPENFD, "ANON": VLIMIT_ANON, "SHMEM": VLIMIT_SHMEM} CPU_SHARE_MULT = 1024 # add in the platform supported rlimits for entry in resource.__dict__.keys(): if entry.find("RLIMIT_")==0: k = entry[len("RLIMIT_"):] if not RLIMITS.has_key(k): RLIMITS[k]=resource.__dict__[entry] else: print "WARNING: duplicate RLIMITS key %s" % k class NoSuchVServer(Exception): pass class VServerConfig: def __init__(self, name, directory): self.name = name self.dir = directory self.cache = None if not (os.path.isdir(self.dir) and os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)): raise NoSuchVServer, "%s does not exist" % self.dir def get(self, option, default = None): try: if self.cache: return self.cache[option] else: f = open(os.path.join(self.dir, option), "r") buf = f.read().rstrip() f.close() return buf except: if default is not None: return default else: raise KeyError, "Key %s is not set for %s" % (option, self.name) def update(self, option, value): if self.cache: return try: old_umask = os.umask(0022) filename = os.path.join(self.dir, option) try: os.makedirs(os.path.dirname(filename), 0755) except: pass f = open(filename, 'w') if isinstance(value, list): f.write("%s\n" % "\n".join(value)) else: f.write("%s\n" % value) f.close() os.umask(old_umask) except: raise def unset(self, option): if self.cache: return try: filename = os.path.join(self.dir, option) os.unlink(filename) try: os.removedirs(os.path.dirname(filename)) except: pass return True except: return False def cache_it(self): self.cache = {} def add_to_cache(cache, dirname, fnames): for file in fnames: full_name = os.path.join(dirname, file) if os.path.islink(full_name): fnames.remove(file) elif (os.path.isfile(full_name) and os.access(full_name, os.R_OK)): f = open(full_name, "r") cache[full_name.replace(os.path.join(self.dir, ''), '')] = f.read().rstrip() f.close() os.path.walk(self.dir, add_to_cache, self.cache) class VServer: def __init__(self, name, vm_id = None, vm_running = None, logfile=None): self.name = name self.dir = "%s/%s" % (vserverimpl.VSERVER_BASEDIR, name) if not (os.path.isdir(self.dir) and os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)): raise NoSuchVServer, "no such vserver: " + name self.config = VServerConfig(name, "/etc/vservers/%s" % name) #self.remove_caps = ~vserverimpl.CAP_SAFE; if vm_id == None: vm_id = int(self.config.get('context')) self.ctx = vm_id if vm_running == None: vm_running = self.is_running() self.vm_running = vm_running self.logfile = logfile # inspired from nodemanager's logger def log_in_file (self, fd, msg): if not msg: msg="\n" if not msg.endswith('\n'): msg += '\n' os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg)) def log(self,msg): if self.logfile: try: fd = os.open(self.logfile,os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600) self.log_in_file(fd,msg) os.close(fd) except: print '%s: (%s failed to open) %s'%(time.asctime(time.gmtime()),self.logfile,msg) def set_rlimit(self, type, hard, soft, min): """Generic set resource limit function for vserver""" global RLIMITS update = False if hard <> VC_LIM_KEEP: self.config.update('rlimits/%s.hard' % type.lower(), hard) update = True if soft <> VC_LIM_KEEP: self.config.update('rlimits/%s.soft' % type.lower(), soft) update = True if min <> VC_LIM_KEEP: self.config.update('rlimits/%s.min' % type.lower(), min) update = True if self.is_running() and update: resource_type = RLIMITS[type] try: vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min) except OSError, e: self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s" % (self.ctx, type.lower(), hard, soft, min, e)) return update def get_prefix_from_capabilities(self, capabilities, prefix): split_caps = capabilities.split(',') return ",".join(["%s" % (c[len(prefix):]) for c in split_caps if c.startswith(prefix.upper()) or c.startswith(prefix.lower())]) def get_bcaps_from_capabilities(self, capabilities): return self.get_prefix_from_capabilities(capabilities, "cap_") def get_ccaps_from_capabilities(self, capabilities): return self.get_prefix_from_capabilities(capabilities, "vxc_") def set_capabilities_config(self, capabilities): bcaps = self.get_bcaps_from_capabilities(capabilities) ccaps = self.get_ccaps_from_capabilities(capabilities) if len(bcaps) > 0: bcaps += "," bcaps += "CAP_NET_RAW" self.config.update('bcapabilities', bcaps) self.config.update('ccapabilities', ccaps) ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps)) if ret > 0: return ret return vserverimpl.setccaps(self.ctx, vserverimpl.text2ccaps(ccaps)) def get_capabilities(self): bcaps = vserverimpl.bcaps2text(vserverimpl.getbcaps(self.ctx)) ccaps = vserverimpl.ccaps2text(vserverimpl.getccaps(self.ctx)) if bcaps and ccaps: ccaps = "," + ccaps return (bcaps + ccaps) def get_capabilities_config(self): bcaps = self.config.get('bcapabilities', '') ccaps = self.config.get('ccapabilities', '') if bcaps and ccaps: ccaps = "," + ccaps return (bcaps + ccaps) def set_ipaddresses(self, addresses): vserverimpl.netremove(self.ctx, "all") for ip in addresses: vserverimpl.netadd(self.ctx, ip) def set_ipaddresses_config(self, addresses, add_loopback=True): ip_addresses = addresses.split(",") # add looopback interface if not ip_addresses.__contains__("127.0.0.1") and add_loopback: ip_addresses.append("127.0.0.1") i = 0 for ip in ip_addresses: self.config.update("interfaces/%d/ip" % i, ip) # create emtpy nodev files to silent "No device specified for" warnings self.config.update("interfaces/%d/nodev" % i, "") i += 1 while self.config.unset("interfaces/%d/ip" % i) and self.config.update("interfaces/%d/nodev" % i, ""): i += 1 self.set_ipaddresses(ip_addresses) def get_ipaddresses_config(self): i = 0 ret = [] while True: r = self.config.get("interfaces/%d/ip" % i, '') if r == '': break ret += [r] i += 1 return ",".join(ret) def get_ipaddresses(self): # No clean way to do this right now. self.log("Calling Vserver.get_ipaddresses for slice %s" % self.name) return None def __do_chroot(self): os.chroot(self.dir) os.chdir("/") def chroot_call(self, fn, *args, **kwargs): cwd_fd = os.open(".", os.O_RDONLY) try: root_fd = os.open("/", os.O_RDONLY) try: self.__do_chroot() result = fn(*args, **kwargs) finally: os.fchdir(root_fd) os.chroot(".") os.fchdir(cwd_fd) os.close(root_fd) finally: os.close(cwd_fd) return result def set_disklimit(self, block_limit): # block_limit is in kB if block_limit == 0: try: vserverimpl.unsetdlimit(self.dir, self.ctx) except OSError, e: self.log("Unexpected error with unsetdlimit for context %d" % self.ctx) return if self.vm_running: block_usage = vserverimpl.DLIMIT_KEEP inode_usage = vserverimpl.DLIMIT_KEEP else: # init_disk_info() must have been called to get usage values block_usage = self.disk_blocks inode_usage = self.disk_inodes try: vserverimpl.setdlimit(self.dir, self.ctx, block_usage, block_limit, inode_usage, vserverimpl.DLIMIT_INF, # inode limit 2) # %age reserved for root except OSError, e: self.log("Unexpected error with setdlimit for context %d" % self.ctx) self.config.update('dlimits/0/space_total', block_limit) def is_running(self): status = subprocess.call(["/usr/sbin/vserver", self.name, "running"], shell=False) return not status def get_disklimit(self): try: (self.disk_blocks, block_limit, self.disk_inodes, inode_limit, reserved) = vserverimpl.getdlimit(self.dir, self.ctx) except OSError, ex: if ex.errno != errno.ESRCH: raise # get here if no vserver disk limit has been set for xid block_limit = -1 return block_limit def set_sched_config(self, cpu_min, cpu_share): """ Write current CPU scheduler parameters to the vserver configuration file. Currently, 'cpu_min' is not supported. """ self.config.update('cgroup/cpu.shares', int(cpu_share) * CPU_SHARE_MULT) if self.is_running(): self.set_sched(cpu_min, cpu_share) def set_sched(self, cpu_min, cpu_share): """ Update kernel CPU scheduling parameters for this context. Currently, 'cpu_min' is not supported. """ try: cgroup = open('/dev/cgroup/%s/cpu.shares' % self.name, 'w') cgroup.write('%s' % (int(cpu_share) * CPU_SHARE_MULT)) cgroup.close() except: pass def get_sched(self): try: cpu_share = int(int(self.config.get('cgroup/cpu.shares')) / CPU_SHARE_MULT) except: cpu_share = False return (-1, cpu_share) def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None, exempt_min = None, exempt_max = None, share = None, dev = "eth0"): if minrate is None: bwlimit.off(self.ctx, dev) else: bwlimit.on(self.ctx, dev, share, minrate, maxrate, exempt_min, exempt_max) def get_bwlimit(self, dev = "eth0"): result = bwlimit.get(self.ctx) # result of bwlimit.get is (ctx, share, minrate, maxrate) if result: result = result[1:] return result def open(self, filename, mode = "r", bufsize = -1): return self.chroot_call(open, filename, mode, bufsize) def enter(self): subprocess.call("/usr/sbin/vserver %s enter" % self.name, shell=True) # 2010 June 21 - Thierry # the slice initscript now gets invoked through rc - see sliver_vs.py in nodemanager # and, rc is triggered as part of vserver .. start # so we don't have to worry about initscripts at all anymore here def start(self, runlevel = 3): if os.fork() != 0: # Parent should just return. self.vm_running = True return else: os.setsid() # first child process: fork again if os.fork() != 0: os._exit(0) # Exit parent (the first child) of the second child. # the grandson is the working one os.chdir('/') os.umask(0022) try: # start the vserver subprocess.call(["/usr/sbin/vserver",self.name,"start"]) # we get here due to an exception in the grandson process except Exception, ex: self.log(traceback.format_exc()) os._exit(0) def set_resources(self): """ Called when vserver context is entered for first time, should be overridden by subclass. """ pass def init_disk_info(self): try: dlimit = vserverimpl.getdlimit(self.dir, self.ctx) self.disk_blocks = dlimit[0] self.disk_inodes = dlimit[2] return self.disk_blocks * 1024 except Exception, e: pass cmd = "/usr/sbin/vdu --script --space --inodes --blocksize 1024 --xid %d %s" % (self.ctx, self.dir) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) p.stdin.close() line = p.stdout.readline() if not line: sys.stderr.write(p.stderr.read()) p.stdout.close() p.stderr.close() ret = p.wait() (space, inodes) = line.split() self.disk_inodes = int(inodes) self.disk_blocks = int(space) return self.disk_blocks * 1024 def stop(self, signal = signal.SIGKILL): self.vm_running = False subprocess.call("/usr/sbin/vserver %s stop" % self.name, shell=True) def setname(self, slice_id): pass def getname(self): '''Get vcVHI_CONTEXT field in kernel''' return vserverimpl.getname(self.ctx) def create(vm_name, static = False, ctor = VServer): options = ['vuseradd'] if static: options += ['--static'] ret = os.spawnvp(os.P_WAIT, 'vuseradd', options + [vm_name]) if not os.WIFEXITED(ret) or os.WEXITSTATUS(ret) != 0: out = "system command ('%s') " % options if os.WIFEXITED(ret): out += "failed, rc = %d" % os.WEXITSTATUS(ret) else: out += "killed by signal %d" % os.WTERMSIG(ret) raise SystemError, out vm_id = pwd.getpwnam(vm_name)[2] return ctor(vm_name, vm_id)