X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=logger.py;h=dc65bc2f4403902199b891008b36dc504b19139a;hb=8a73c41fa38f330fd125d85d693d1390ea2477a4;hp=3411df50a8e4bd0b35521ca3c4c1a4890875601a;hpb=aac3e5d7c3443d6e1cb33525aefad35be5fe077a;p=nodemanager.git diff --git a/logger.py b/logger.py index 3411df5..dc65bc2 100644 --- a/logger.py +++ b/logger.py @@ -1,27 +1,109 @@ -import fcntl -import os -import subprocess +# $Id$ +# $URL$ + +"""A very simple logger that tries to be concurrency-safe.""" + +import os, sys import time import traceback +import subprocess +import select + +LOG_FILE = '/var/log/nm' +LOG_SLIVERS = '/var/log/getslivers.txt' + +# Thierry - trying to debug this for 4.2 +# basically define 3 levels +LOG_NONE=0 +LOG_NODE=1 +LOG_VERBOSE=2 +# default is to log a reasonable amount of stuff for when running on operational nodes +LOG_LEVEL=1 -from config import LOG_FILE +def set_level(level): + global LOG_LEVEL + assert level in [LOG_NONE,LOG_NODE,LOG_VERBOSE] + LOG_LEVEL=level +def verbose(msg): + log('(v) '+msg,LOG_VERBOSE) -def log(msg): - """Write to the log file.""" - # the next three lines ought to be an atomic operation but aren't - fd = os.open(LOG_FILE, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600) - flags = fcntl.fcntl(fd, fcntl.F_GETFD) - fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) - if not msg.endswith('\n'): msg += '\n' - os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg)) - os.close(fd) +def log(msg,level=LOG_NODE): + """Write to the log file if level >= current log level (default LOG_NODE).""" + if (level > LOG_LEVEL): + return + try: + fd = os.open(LOG_FILE, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600) + if not msg.endswith('\n'): msg += '\n' + os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg)) + os.close(fd) + except OSError: + sys.stderr.write(msg) + sys.stderr.flush() -def log_call(*args): - log('running command %s' % ' '.join(args)) - try: subprocess.call(args) - except: log_exc() +#################### child processes +# avoid waiting until the process returns; +# that makes debugging of hanging children hard -def log_exc(): +# time out in seconds - avoid hanging subprocesses - default is 5 minutes +default_timeout_minutes=5 + +def log_call(command,timeout=default_timeout_minutes*60,poll=0.3): + log('log_call: running command %s' % ' '.join(command)) + verbose('log_call: timeout %r s' % timeout) + verbose('log_call: poll %r s' % poll) + trigger=time.time()+timeout + try: + child = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) + while True: + # see if anything can be read within the poll interval + (r,w,x)=select.select([child.stdout,child.stderr],[],[],poll) + # read and log it + for fd in r: + input=fd.read() + if input: log(input) + # is process over ? + returncode=child.poll() + # yes + if returncode != None: + # child is done and return 0 + if returncode == 0: + verbose('log_call: command completed %s' % ' '.join(command)) + break + # child has failed + else: + raise Exception("log_call: failed with returncode %d"%returncode) + # no : still within timeout ? + if time.time() >= trigger: + child.terminate() + raise Exception("log_call: terminated command - exceeded timeout %d s"%timeout) + except: log_exc('failed to run command %s' % ' '.join(command)) + +def log_exc(msg="",name=None): """Log the traceback resulting from an exception.""" - log(traceback.format_exc()) + if name: + log("%s: EXCEPTION caught <%s> \n %s" %(name, msg, traceback.format_exc())) + else: + log("EXCEPTION caught <%s> \n %s" %(msg, traceback.format_exc())) + +# for some reason the various modules are still triggered even when the +# data from PLC cannot be reached +# we show this message instead of the exception stack instead in this case +def log_missing_data (msg,key): + log("%s: could not find the %s key in data (PLC connection down?) - IGNORED"%(msg,key)) + +def log_data_in_file (data, file, message=""): + import pprint, time + try: + f=open(file,'w') + now=time.strftime("Last update: %Y.%m.%d at %H:%M:%S %Z", time.localtime()) + f.write(now+'\n') + if message: f.write('Message:'+message+'\n') + pp=pprint.PrettyPrinter(stream=f,indent=2) + pp.pprint(data) + f.close() + except: + log_verbose('log_data_in_file failed - file=%s - message=%r'%(file,message)) + +def log_slivers (data): + log_data_in_file (data, LOG_SLIVERS, "raw GetSlivers")