From b96a2e5fd3601536d4e1af2701ceee57ad032ee9 Mon Sep 17 00:00:00 2001 From: smbaker Date: Tue, 8 Mar 2011 16:27:42 -0800 Subject: [PATCH] support for scheduling cores to slices --- coresched.py | 150 +++++++++++++++++++++++++++++++++++++++++++++++ database.py | 7 +++ setup.py | 1 + slivermanager.py | 1 + 4 files changed, 159 insertions(+) create mode 100644 coresched.py diff --git a/coresched.py b/coresched.py new file mode 100644 index 0000000..cc0282b --- /dev/null +++ b/coresched.py @@ -0,0 +1,150 @@ +# $Id$ +# $URL$ + +"""Whole core scheduling + +""" + +import logger +import os + +class CoreSched: + """ Whole-core scheduler + + The main entrypoint is adjustCores(self, slivers) which takes a + dictionary of sliver records. The cpu_cores field is pulled from the + effective rspec (rec["_rspec"]) for each sliver. + + If cpu_cores > 0 for a sliver, then that sliver will reserve one or + more of the cpu_cores on the machine. + + One core is always left unreserved for system slices. + """ + + def __init__(self): + self.cpus = [] + + def get_cpus(self): + """ return a list of available cpu identifiers: [0,1,2,3...] + """ + + # the cpus never change, so if it's already been computed then don't + # worry about it. + if self.cpus!=[]: + return self.cpus + + cpuset_cpus = open("/dev/cgroup/cpuset.cpus").readline().strip() + + # cpuset.cpus could be something as arbitrary as: + # 0,1,2-3,4,5-6 + # deal with commas and ranges + for part in cpuset_cpus.split(","): + cpuRange = part.split("-") + if len(cpuRange) == 1: + cpuRange = (cpuRange[0], cpuRange[0]) + for i in range(int(cpuRange[0]), int(cpuRange[1])+1): + if not i in self.cpus: + self.cpus.append(i) + + return self.cpus + + def get_cgroups (self): + """ return a list of cgroups + this might change as vservers are instantiated, so always compute + it dynamically. + """ + cgroups = [] + filenames = os.listdir("/dev/cgroup") + for filename in filenames: + if os.path.isdir(os.path.join("/dev/cgroup", filename)): + cgroups.append(filename) + return cgroups + + def adjustCores (self, slivers): + """ slivers is a dict of {sliver_name: rec} + rec is a dict of attributes + rec['_rspec'] is the effective rspec + """ + + logger.log("CoreSched: adjusting cores") + + cpus = self.get_cpus()[:] + + reservations = {} + + for name, rec in slivers.iteritems(): + rspec = rec["_rspec"] + cores = rspec.get("cpu_cores", 0) + while (cores>0): + # one cpu core reserved for best effort and system slices + if len(cpus)<=1: + logger.log("CoreSched: ran out of cpu cores while scheduling: " + name) + else: + cpu = cpus.pop() + logger.log("CoreSched: allocating cpu " + str(cpu) + " to slice " + name) + reservations[name] = reservations.get(name,[]) + [cpu] + + cores = cores-1 + + # the leftovers go to everyone else + logger.log("CoreSched: allocating cpus " + str(cpus) + " to _default") + reservations["_default"] = cpus[:] + + self.reserveCores(reservations) + + def reserveCores (self, reservations): + """ give a set of reservations (dictionary of slicename:cpuid_list), + write those reservations to the appropriate cgroup files. + + reservations["_default"] is assumed to be the default reservation + for slices that do not reserve cores. It's essentially the leftover + cpu cores. + """ + + default = reservations["_default"] + + # set the default vserver cpuset. this will deal with any vservers + # that might be created before the nodemanager has had a chance to + # update the cpusets. + self.reserveDefault(default) + + for cgroup in self.get_cgroups(): + cpus = reservations.get(cgroup, default) + + logger.log("CoreSched: reserving " + cgroup + " " + str(cpus)) + + file("/dev/cgroup/" + cgroup + "/cpuset.cpus", "w").write( self.listToRange(cpus) + "\n" ) + + def reserveDefault (self, cpus): + if not os.path.exists("/etc/vservers/.defaults/cgroup"): + os.makedirs("/etc/vservers/.defaults/cgroup") + + file("/etc/vservers/.defaults/cgroup/cpuset.cpus", "w").write( self.listToRange(cpus) + "\n" ) + + def listToRange (self, list): + """ take a list of items [1,2,3,5,...] and return it as a range: "1-3,5" + for now, just comma-separate + """ + return ",".join( [str(i) for i in list] ) + +# a little self-test +if __name__=="__main__": + x = CoreSched() + + print "cpus:", x.listToRange(x.get_cpus()) + print "cgroups:", ",".join(x.get_cgroups()) + + # a quick self-test for ScottLab slices sl_test1 and sl_test2 + # sl_test1 = 1 core + # sl_test2 = 1 core + + rspec_sl_test1 = {"cpu_cores": 1} + rec_sl_test1 = {"_rspec": rspec_sl_test1} + + rspec_sl_test2 = {"cpu_cores": 1} + rec_sl_test2 = {"_rspec": rspec_sl_test2} + + slivers = {"sl_test1": rec_sl_test1, "sl_test2": rec_sl_test2} + + x.adjustCores(slivers) + diff --git a/database.py b/database.py index dc68af8..020fd25 100644 --- a/database.py +++ b/database.py @@ -18,6 +18,7 @@ import threading import time import accounts +import coresched import logger import tools import bwmon @@ -118,6 +119,12 @@ It may be necessary in the future to do something smarter.""" self._compute_effective_rspecs() + try: + x = coresched.CoreSched() + x.adjustCores(self) + except: + logger.log_exc("database: exception while doing core sched") + # create and destroy accounts as needed logger.verbose("database: sync : fetching accounts") existing_acct_names = accounts.all() diff --git a/setup.py b/setup.py index bc10f60..c8d4614 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ setup( 'conf_files', 'config', 'controller', + 'coresched', 'curlwrapper', 'database', 'iptables', diff --git a/slivermanager.py b/slivermanager.py index f0a610a..b7c5439 100644 --- a/slivermanager.py +++ b/slivermanager.py @@ -29,6 +29,7 @@ DEFAULT_ALLOCATION = { # CPU parameters 'cpu_pct': 0, # percent CPU reserved 'cpu_share': 1, # proportional share + 'cpu_cores': 0, # reserved cpu cores # bandwidth parameters 'net_min_rate': bwmin / 1000, # kbps 'net_max_rate': bwmax / 1000, # kbps -- 2.47.0