X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=nodemanager.py;h=7f5e204874eaf7ac9b8acf875b52ba23693d1ed0;hb=9e6b9c1ea9e020c55c85b433bac47231d63e9ffd;hp=386776fb3aab820fa26f116eef2b98504698fe9a;hpb=22d40df4ed31c001fd58966640ed0c5079d486e6;p=nodemanager.git diff --git a/nodemanager.py b/nodemanager.py index 386776f..7f5e204 100755 --- a/nodemanager.py +++ b/nodemanager.py @@ -1,8 +1,5 @@ #!/usr/bin/python # -# $Id$ -# $URL$ -# # Useful information can be found at https://svn.planet-lab.org/wiki/NodeManager # @@ -18,31 +15,29 @@ import xmlrpclib import socket import os import sys -import resource import glob import pickle +import random +import resource import logger import tools from config import Config -from plcapi import PLCAPI -import random +from plcapi import PLCAPI class NodeManager: - id="$Id$" - PLUGIN_PATH = "/usr/share/NodeManager/plugins" DB_FILE = "/var/lib/nodemanager/getslivers.pickle" # the modules in this directory that need to be run # NOTE: modules listed here will also be loaded in this order - # once loaded, they get re-ordered after their priority (lower comes first) + # once loaded, they get re-ordered after their priority (lower comes first) # for determining the runtime order - core_modules=['net','conf_files', 'sm', 'bwmon'] + core_modules=['net', 'conf_files', 'slivermanager', 'bwmon'] default_period=600 default_random=301 @@ -51,16 +46,20 @@ class NodeManager: def __init__ (self): parser = optparse.OptionParser() - parser.add_option('-d', '--daemon', action='store_true', dest='daemon', default=False, help='run daemonized') - parser.add_option('-s', '--startup', action='store_true', dest='startup', default=False, help='run all sliver startup scripts') - parser.add_option('-f', '--config', action='store', dest='config', default='/etc/planetlab/plc_config', help='PLC configuration file') - parser.add_option('-k', '--session', action='store', dest='session', default='/etc/planetlab/session', help='API session key (or file)') - parser.add_option('-p', '--period', action='store', dest='period', default=NodeManager.default_period, + parser.add_option('-d', '--daemon', action='store_true', dest='daemon', default=False, + help='run daemonized') + parser.add_option('-f', '--config', action='store', dest='config', default='/etc/planetlab/plc_config', + help='PLC configuration file') + parser.add_option('-k', '--session', action='store', dest='session', default='/etc/planetlab/session', + help='API session key (or file)') + parser.add_option('-p', '--period', action='store', dest='period', default=NodeManager.default_period, help='Polling interval (sec) - default %d'%NodeManager.default_period) - parser.add_option('-r', '--random', action='store', dest='random', default=NodeManager.default_random, + parser.add_option('-r', '--random', action='store', dest='random', default=NodeManager.default_random, help='Range for additional random polling interval (sec) -- default %d'%NodeManager.default_random) - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='more verbose log') - parser.add_option('-P', '--path', action='store', dest='path', default=NodeManager.PLUGIN_PATH, help='Path to plugins directory') + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='more verbose log') + parser.add_option('-P', '--path', action='store', dest='path', default=NodeManager.PLUGIN_PATH, + help='Path to plugins directory') # NOTE: BUG the 'help' for this parser.add_option() wont list plugins from the --path argument parser.add_option('-m', '--module', action='store', dest='user_module', default='', help='run a single module') @@ -75,7 +74,10 @@ class NodeManager: # Deal with plugins directory if os.path.exists(self.options.path): sys.path.append(self.options.path) - plugins = [ os.path.split(os.path.splitext(x)[0])[1] for x in glob.glob( os.path.join(self.options.path,'*.py') ) ] + plugins = [ os.path.split(os.path.splitext(x)[0])[1] + for x in glob.glob( os.path.join(self.options.path,'*.py') ) + if not x.endswith("/__init__.py") + ] self.modules += plugins if self.options.user_module: assert self.options.user_module in self.modules @@ -84,8 +86,8 @@ class NodeManager: def GetSlivers(self, config, plc): - """Run call backs defined in modules""" - try: + """Retrieves GetSlivers at PLC and triggers callbacks defined in modules/plugins""" + try: logger.log("nodemanager: Syncing w/ PLC") # retrieve GetSlivers from PLC data = plc.GetSlivers() @@ -93,13 +95,13 @@ class NodeManager: self.getPLCDefaults(data, config) # tweak the 'vref' attribute from GetSliceFamily self.setSliversVref (data) - # log it for debug purposes, no matter what verbose is - logger.log_slivers(data) # dump it too, so it can be retrieved later in case of comm. failure self.dumpSlivers(data) + # log it for debug purposes, no matter what verbose is + logger.log_slivers(data) logger.verbose("nodemanager: Sync w/ PLC done") last_data=data - except: + except: logger.log_exc("nodemanager: failed in GetSlivers") # XXX So some modules can at least boostrap. logger.log("nodemanager: Can't contact PLC to GetSlivers(). Continuing.") @@ -108,14 +110,14 @@ class NodeManager: last_data=self.loadSlivers() # Invoke GetSlivers() functions from the callback modules for module in self.loaded_modules: - logger.verbose('triggering GetSlivers callback for module %s'%module.__name__) - try: + logger.verbose('nodemanager: triggering %s.GetSlivers'%module.__name__) + try: callback = getattr(module, 'GetSlivers') module_data=data if getattr(module,'persistent_data',False): module_data=last_data callback(data, config, plc) - except: + except: logger.log_exc("nodemanager: GetSlivers failed to run callback for module %r"%module) @@ -123,17 +125,17 @@ class NodeManager: """ Get PLC wide defaults from _default system slice. Adds them to config class. """ - for slice in data.get('slivers'): + for slice in data.get('slivers'): if slice['name'] == config.PLC_SLICE_PREFIX+"_default": attr_dict = {} - for attr in slice.get('attributes'): attr_dict[attr['tagname']] = attr['value'] + for attr in slice.get('attributes'): attr_dict[attr['tagname']] = attr['value'] if len(attr_dict): logger.verbose("nodemanager: Found default slice overrides.\n %s" % attr_dict) config.OVERRIDES = attr_dict return # NOTE: if an _default slice existed, it would have been found above and - # the routine would return. Thus, if we've gotten here, then no default - # slice is bound to this node. + # the routine would return. Thus, if we've gotten here, then no default + # slice is bound to this node. if 'OVERRIDES' in dir(config): del config.OVERRIDES @@ -143,23 +145,23 @@ class NodeManager: """ # GetSlivers exposes the result of GetSliceFamily() as an separate key in data # It is safe to override the attributes with this, as this method has the right logic - for sliver in data.get('slivers'): + for sliver in data.get('slivers'): try: slicefamily=sliver.get('GetSliceFamily') for att in sliver['attributes']: - if att['tagname']=='vref': + if att['tagname']=='vref': att['value']=slicefamily continue sliver['attributes'].append({ 'tagname':'vref','value':slicefamily}) except: logger.log_exc("nodemanager: Could not overwrite 'vref' attribute from 'GetSliceFamily'",name=sliver['name']) - + def dumpSlivers (self, slivers): f = open(NodeManager.DB_FILE, "w") logger.log ("nodemanager: saving successfully fetched GetSlivers in %s" % NodeManager.DB_FILE) pickle.dump(slivers, f) f.close() - + def loadSlivers (self): try: f = open(NodeManager.DB_FILE, "r+") @@ -170,59 +172,70 @@ class NodeManager: except: logger.log("Could not restore GetSlivers from %s" % NodeManager.DB_FILE) return {} - + def run(self): + # make sure to create /etc/planetlab/virt so others can read that + # used e.g. in vsys-scripts's sliceip + tools.get_node_virt() try: if self.options.daemon: tools.daemon() - + # set log level if (self.options.verbose): logger.set_level(logger.LOG_VERBOSE) - + tools.init_signals() + # Load /etc/planetlab/plc_config config = Config(self.options.config) - + try: other_pid = tools.pid_file() if other_pid != None: - print """There might be another instance of the node manager running as pid %d. If this is not the case, please remove the pid file %s""" % (other_pid, tools.PID_FILE) + print """There might be another instance of the node manager running as pid %d. +If this is not the case, please remove the pid file %s. -- exiting""" % (other_pid, tools.PID_FILE) return except OSError, err: print "Warning while writing PID file:", err - + # load modules self.loaded_modules = [] for module in self.modules: try: m = __import__(module) - m.start(self.options, config) + logger.verbose("nodemanager: triggering %s.start"%m.__name__) + try: m.start() + except: logger.log("WARNING: module %s did not start") self.loaded_modules.append(m) - except ImportError, err: - print "Warning while loading module %s:" % module, err - + except: + if module not in NodeManager.core_modules: + logger.log_exc ("ERROR while loading module %s - skipped" % module) + else: + logger.log("FATAL : failed to start core module %s"%module) + sys.exit(1) + # sort on priority (lower first) def sort_module_priority (m1,m2): return getattr(m1,'priority',NodeManager.default_priority) - getattr(m2,'priority',NodeManager.default_priority) self.loaded_modules.sort(sort_module_priority) - + logger.log('ordered modules:') - for module in self.loaded_modules: + for module in self.loaded_modules: logger.log ('%s: %s'%(getattr(module,'priority',NodeManager.default_priority),module.__name__)) - + # Load /etc/planetlab/session if os.path.exists(self.options.session): session = file(self.options.session).read().strip() else: session = None - - + + # get random periods iperiod=int(self.options.period) irandom=int(self.options.random) - + # Initialize XML-RPC client plc = PLCAPI(config.plc_api_uri, config.cacert, session, timeout=iperiod/2) - + #check auth logger.log("nodemanager: Checking Auth.") while plc.check_authentication() != True: @@ -233,23 +246,29 @@ class NodeManager: logger.log("nodemanager: Retry Failed. (%r); Waiting.."%e) time.sleep(iperiod) logger.log("nodemanager: Authentication Succeeded!") - - + + while True: # Main nodemanager Loop + work_beg=time.time() logger.log('nodemanager: mainloop - calling GetSlivers - period=%d random=%d'%(iperiod,irandom)) self.GetSlivers(config, plc) delay=iperiod + random.randrange(0,irandom) - logger.log('nodemanager: mainloop - sleeping for %d s'%delay) + work_end=time.time() + work_duration=int(work_end-work_beg) + logger.log('nodemanager: mainloop has worked for %s s - sleeping for %d s'%(work_duration,delay)) time.sleep(delay) - except: logger.log_exc("nodemanager: failed in run") + except SystemExit: + pass + except: + logger.log_exc("nodemanager: failed in run") def run(): - logger.log("======================================== Entering nodemanager.py "+NodeManager.id) + logger.log("======================================== Entering nodemanager.py") NodeManager().run() - + if __name__ == '__main__': run() else: - # This is for debugging purposes. Open a copy of Python and import nm + # This is for debugging purposes. Open a copy of Python and import nodemanager tools.as_daemon_thread(run)