From 8a73c41fa38f330fd125d85d693d1390ea2477a4 Mon Sep 17 00:00:00 2001 From: Thierry Parmentelat Date: Tue, 9 Feb 2010 17:37:04 +0000 Subject: [PATCH] (*) the various modules have a priority; lower gets invoked first (*) default is net, conf_files, specialaccounts, sm, bwmon, then the rest (*) log_call can log child output on the fly even if hangs (*) log_call has timeout and terminates hanging child if too long --- bwmon.py | 2 ++ conf_files.py | 3 +++ controller.py | 4 ++-- logger.py | 48 ++++++++++++++++++++++++++++++-------- net.py | 3 +++ nm.py | 18 ++++++++++++-- plugins/codemux.py | 8 +++---- plugins/specialaccounts.py | 5 ++++ plugins/vsys.py | 2 +- sliver_vs.py | 7 ++++-- sm.py | 2 ++ 11 files changed, 81 insertions(+), 21 deletions(-) diff --git a/bwmon.py b/bwmon.py index 2b13d6f..dad2a86 100644 --- a/bwmon.py +++ b/bwmon.py @@ -33,6 +33,8 @@ import database from sets import Set +priority = 20 + # Defaults # Set DEBUG to True if you don't want to send emails DEBUG = False diff --git a/conf_files.py b/conf_files.py index f3d1eb9..e7b823d 100644 --- a/conf_files.py +++ b/conf_files.py @@ -15,6 +15,9 @@ import tools import xmlrpclib from config import Config +# right after net +priority = 2 + class conf_files: def __init__(self, noscripts=False): self.config = Config() diff --git a/controller.py b/controller.py index 2f2bc0c..42f7514 100644 --- a/controller.py +++ b/controller.py @@ -17,10 +17,10 @@ class Controller(accounts.Account): def create(name, vref = None): add_shell(Controller.SHELL) group = getgrnam("slices")[2] - logger.log_call('/usr/sbin/useradd', '-p', '*', '-g', str(group), '-s', Controller.SHELL, name) + logger.log_call(['/usr/sbin/useradd', '-p', '*', '-g', str(group), '-s', Controller.SHELL, name, ]) @staticmethod - def destroy(name): logger.log_call('/usr/sbin/userdel', '-r', name) + def destroy(name): logger.log_call(['/usr/sbin/userdel', '-r', name, ]) def is_running(self): logger.verbose("controller: is_running: %s" % self.name) diff --git a/logger.py b/logger.py index eff93e6..dc65bc2 100644 --- a/logger.py +++ b/logger.py @@ -4,10 +4,10 @@ """A very simple logger that tries to be concurrency-safe.""" import os, sys -import subprocess import time import traceback - +import subprocess +import select LOG_FILE = '/var/log/nm' LOG_SLIVERS = '/var/log/getslivers.txt' @@ -41,15 +41,43 @@ def log(msg,level=LOG_NODE): sys.stderr.write(msg) sys.stderr.flush() -def log_call(*args): - log('running command %s' % ' '.join(args)) +#################### child processes +# avoid waiting until the process returns; +# that makes debugging of hanging children hard + +# time out in seconds - avoid hanging subprocesses - default is 5 minutes +default_timeout_minutes=5 + +def log_call(command,timeout=default_timeout_minutes*60,poll=0.3): + log('log_call: running command %s' % ' '.join(command)) + verbose('log_call: timeout %r s' % timeout) + verbose('log_call: poll %r s' % poll) + trigger=time.time()+timeout try: - child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) - child.wait() # wait for proc to hang up - if child.returncode: - raise Exception("command failed:\n stdout - %s\n stderr - %s" % \ - (child.stdout.readlines(), child.stderr.readlines())) - except: log_exc('failed to run command %s' % ' '.join(args)) + child = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) + while True: + # see if anything can be read within the poll interval + (r,w,x)=select.select([child.stdout,child.stderr],[],[],poll) + # read and log it + for fd in r: + input=fd.read() + if input: log(input) + # is process over ? + returncode=child.poll() + # yes + if returncode != None: + # child is done and return 0 + if returncode == 0: + verbose('log_call: command completed %s' % ' '.join(command)) + break + # child has failed + else: + raise Exception("log_call: failed with returncode %d"%returncode) + # no : still within timeout ? + if time.time() >= trigger: + child.terminate() + raise Exception("log_call: terminated command - exceeded timeout %d s"%timeout) + except: log_exc('failed to run command %s' % ' '.join(command)) def log_exc(msg="",name=None): """Log the traceback resulting from an exception.""" diff --git a/net.py b/net.py index 45dee4b..760c1be 100644 --- a/net.py +++ b/net.py @@ -12,6 +12,9 @@ import sioc, plnet # local modules import bwlimit, logger, iptables, tools +# we can't do anything without a network +priority=1 + dev_default = tools.get_default_if() def start(options, conf): diff --git a/nm.py b/nm.py index abc05bb..7236cc8 100755 --- a/nm.py +++ b/nm.py @@ -19,6 +19,7 @@ import socket import os import sys import resource +import glob import logger import tools @@ -31,6 +32,7 @@ id="$Id$" savedargv = sys.argv[:] # NOTE: modules listed here should also be loaded in this order +# see the priority set in each module - lower comes first known_modules=['net','conf_files', 'sm', 'bwmon'] plugin_path = "/usr/share/NodeManager/plugins" @@ -57,7 +59,8 @@ parser.add_option('-m', '--module', action='store', dest='module', default='', h # Deal with plugins directory if os.path.exists(options.path): sys.path.append(options.path) - known_modules += [i[:-3] for i in os.listdir(options.path) if i.endswith(".py") and (i[:-3] not in known_modules)] + plugins = [ os.path.split(os.path.splitext(x)[0])[1] for x in glob.glob( os.path.join(options.path,'*.py') ) ] + known_modules += plugins modules = [] @@ -83,10 +86,12 @@ def GetSlivers(config, plc): data = {} # Invoke GetSlivers() functions from the callback modules for module in modules: +# logger.log('trigerring GetSlivers callback for module %s'%module.__name__) try: callback = getattr(module, 'GetSlivers') callback(data, config, plc) - except: logger.log_exc("nm: GetSlivers failed to run callback for module %r"%module) + except: + logger.log_exc("nm: GetSlivers failed to run callback for module %r"%module) def getPLCDefaults(data, config): @@ -159,6 +164,15 @@ def run(): except ImportError, err: print "Warning while loading module %s:" % module, err + default_priority=100 + # sort on priority (lower first) + def sort_module_priority (m1,m2): + return getattr(m1,'priority',default_priority) - getattr(m2,'priority',default_priority) + modules.sort(sort_module_priority) + + logger.verbose('modules priorities and order:') + for module in modules: logger.verbose ('%s: %s'%(getattr(module,'priority',default_priority),module.__name__)) + # Load /etc/planetlab/session if os.path.exists(options.session): session = file(options.session).read().strip() diff --git a/plugins/codemux.py b/plugins/codemux.py index 471f269..65623f3 100644 --- a/plugins/codemux.py +++ b/plugins/codemux.py @@ -146,18 +146,18 @@ def isRunning(): def restartService(): logger.log("codemux: Restarting codemux service") if isRunning(): - logger.log_call("/etc/init.d/codemux","condrestart") + logger.log_call(["/etc/init.d/codemux","condrestart", ]) else: - logger.log_call("/etc/init.d/codemux","restart") + logger.log_call(["/etc/init.d/codemux","restart", ]) def startService(): if not isRunning(): logger.log("codemux: Starting codemux service") - logger.log_call("/etc/init.d/codemux", "start") + logger.log_call(["/etc/init.d/codemux", "start", ]) def stopService(): if isRunning(): logger.log("codemux: Stopping codemux service") - logger.log_call("/etc/init.d/codemux", "stop") + logger.log_call(["/etc/init.d/codemux", "stop", ]) diff --git a/plugins/specialaccounts.py b/plugins/specialaccounts.py index 1fa15e1..fb33688 100644 --- a/plugins/specialaccounts.py +++ b/plugins/specialaccounts.py @@ -22,6 +22,9 @@ import pwd import logger import tools +# right after conf_files +priority = 3 + def start(options, conf): logger.log("personkeys: plugin starting up...") @@ -34,6 +37,8 @@ def GetSlivers(data, conf = None, plc = None): name = account['name'] new_keys = account['keys'] + logger.log('specialaccounts: dealing with account %s'%name) + # look up account name, which must exist pw_info = pwd.getpwnam(name) uid = pw_info[2] diff --git a/plugins/vsys.py b/plugins/vsys.py index ee806e7..5c9b753 100644 --- a/plugins/vsys.py +++ b/plugins/vsys.py @@ -45,7 +45,7 @@ def GetSlivers(data, config=None, plc=None): # Write out the ACLs if writeAcls(scripts, parseAcls()) or _restart: logger.log("vsys: restarting vsys service") - logger.log_call("/etc/init.d/vsys", "restart") + logger.log_call(["/etc/init.d/vsys", "restart", ]) def createVsysDir(sliver): diff --git a/sliver_vs.py b/sliver_vs.py index 5d487f6..85cbac2 100644 --- a/sliver_vs.py +++ b/sliver_vs.py @@ -106,7 +106,10 @@ class Sliver_VS(accounts.Account, vserver.VServer): personality="linux64" return personality - logger.log_call('/usr/sbin/vuseradd', '-t', vref, name) + # temporary : run vuseradd verbosely - mostly a check for log_call to work properly, + # since the issue with the kernel seems to have been spotted +# logger.log_call(['/usr/sbin/vuseradd', '-t', vref, name, ], timeout=10*60) + logger.log_call(['/bin/bash','-x','/usr/sbin/vuseradd', '-t', vref, name, ], timeout=15*60) # export slicename to the slice in /etc/slicename file('/vservers/%s/etc/slicename' % name, 'w').write(name) file('/vservers/%s/etc/slicefamily' % name, 'w').write(vref) @@ -116,7 +119,7 @@ class Sliver_VS(accounts.Account, vserver.VServer): logger.log('sliver_vs: %s: set personality to %s'%(name,personality(arch))) @staticmethod - def destroy(name): logger.log_call('/usr/sbin/vuserdel', name) + def destroy(name): logger.log_call(['/usr/sbin/vuserdel', name, ]) def configure(self, rec): new_rspec = rec['_rspec'] diff --git a/sm.py b/sm.py index 6192ba1..41f1f23 100644 --- a/sm.py +++ b/sm.py @@ -24,6 +24,8 @@ import sliver_vs import string,re +priority=10 + DEFAULT_ALLOCATION = { 'enabled': 1, # CPU parameters -- 2.43.0