X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=logger.py;h=b38c872b13760715c5c7e5ae23fd37c6e61340ad;hb=ecee05390277f57b02d21ffca0195292bde1defa;hp=eff93e69d6a2b2040f1a30e17ba3e9a99675e34d;hpb=7aa5872239d04b2d0a7cb3b00a9c14b5ea18d013;p=nodemanager.git diff --git a/logger.py b/logger.py index eff93e6..b38c872 100644 --- a/logger.py +++ b/logger.py @@ -1,29 +1,30 @@ -# $Id$ -# $URL$ """A very simple logger that tries to be concurrency-safe.""" import os, sys -import subprocess import time import traceback +import subprocess +import select +LOG_FILE = '/var/log/nodemanager' +LOG_SLIVERS = '/var/lib/nodemanager/getslivers.txt' +LOG_DATABASE = '/var/lib/nodemanager/database.txt' -LOG_FILE = '/var/log/nm' -LOG_SLIVERS = '/var/log/getslivers.txt' - -# Thierry - trying to debug this for 4.2 # basically define 3 levels LOG_NONE=0 LOG_NODE=1 LOG_VERBOSE=2 # default is to log a reasonable amount of stuff for when running on operational nodes -LOG_LEVEL=1 +LOG_LEVEL=LOG_NODE def set_level(level): global LOG_LEVEL - assert level in [LOG_NONE,LOG_NODE,LOG_VERBOSE] - LOG_LEVEL=level + try: + assert level in [LOG_NONE,LOG_NODE,LOG_VERBOSE] + LOG_LEVEL=level + except: + logger.log("Failed to set LOG_LEVEL to %s"%level) def verbose(msg): log('(v) '+msg,LOG_VERBOSE) @@ -41,30 +42,36 @@ def log(msg,level=LOG_NODE): sys.stderr.write(msg) sys.stderr.flush() -def log_call(*args): - log('running command %s' % ' '.join(args)) - try: - child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) - child.wait() # wait for proc to hang up - if child.returncode: - raise Exception("command failed:\n stdout - %s\n stderr - %s" % \ - (child.stdout.readlines(), child.stderr.readlines())) - except: log_exc('failed to run command %s' % ' '.join(args)) - +date_width=24 def log_exc(msg="",name=None): - """Log the traceback resulting from an exception.""" - if name: - log("%s: EXCEPTION caught <%s> \n %s" %(name, msg, traceback.format_exc())) - else: - log("EXCEPTION caught <%s> \n %s" %(msg, traceback.format_exc())) + """Log traceback resulting from an exception.""" + printout="" + if name: printout += "%s: "%name + printout += "EXCEPTION caught <%s> \n" %msg + for frame in traceback.format_exc().split("\n"): + printout+=(date_width+2)*" "+"%s\n"%frame + log(printout) + +def log_trace(msg="",name=None): + """Log current stack""" + printout="" + if name: printout += "%s: "%name + printout += "LOGTRACE\n" + for frame in traceback.format_stack(): + printout += "..."+frame + log(printout) + +########## snapshot data to a file # for some reason the various modules are still triggered even when the # data from PLC cannot be reached # we show this message instead of the exception stack instead in this case def log_missing_data (msg,key): log("%s: could not find the %s key in data (PLC connection down?) - IGNORED"%(msg,key)) -def log_data_in_file (data, file, message=""): +def log_data_in_file (data, file, message="",level=LOG_NODE): + if (level > LOG_LEVEL): + return import pprint, time try: f=open(file,'w') @@ -74,8 +81,71 @@ def log_data_in_file (data, file, message=""): pp=pprint.PrettyPrinter(stream=f,indent=2) pp.pprint(data) f.close() + verbose("logger:.log_data_in_file Owerwrote %s"%file) except: - log_verbose('log_data_in_file failed - file=%s - message=%r'%(file,message)) + log_exc('logger.log_data_in_file failed - file=%s - message=%r'%(file,message)) def log_slivers (data): log_data_in_file (data, LOG_SLIVERS, "raw GetSlivers") +def log_database (db): + log_data_in_file (db, LOG_DATABASE, "raw database") + +#################### child processes +# avoid waiting until the process returns; +# that makes debugging of hanging children hard + +class Buffer: + def __init__ (self,message='log_call: '): + self.buffer='' + self.message=message + + def add (self,c): + self.buffer += c + if c=='\n': self.flush() + + def flush (self): + if self.buffer: + log (self.message + self.buffer) + self.buffer='' + +# time out in seconds - avoid hanging subprocesses - default is 5 minutes +default_timeout_minutes=5 + +# returns a bool that is True when everything goes fine and the retcod is 0 +def log_call(command,timeout=default_timeout_minutes*60,poll=1): + message=" ".join(command) + log("log_call: running command %s" % message) + verbose("log_call: timeout=%r s" % timeout) + verbose("log_call: poll=%r s" % poll) + trigger=time.time()+timeout + result = False + try: + child = subprocess.Popen(command, bufsize=1, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) + buffer = Buffer() + while True: + # see if anything can be read within the poll interval + (r,w,x)=select.select([child.stdout],[],[],poll) + if r: buffer.add(child.stdout.read(1)) + # is process over ? + returncode=child.poll() + # yes + if returncode != None: + buffer.flush() + # child is done and return 0 + if returncode == 0: + log("log_call:end command (%s) completed" % message) + result=True + break + # child has failed + else: + log("log_call:end command (%s) returned with code %d" %(message,returncode)) + break + # no : still within timeout ? + if time.time() >= trigger: + buffer.flush() + child.terminate() + log("log_call:end terminating command (%s) - exceeded timeout %d s"%(message,timeout)) + break + except: log_exc("failed to run command %s" % message) + return result