logger.py

   1
   2 """A very simple logger that tries to be concurrency-safe."""
   3
   4 import os, sys
   5 import time
   6 import traceback
   7 import subprocess
   8 import select
   9
  10 LOG_FILE    = '/var/log/nodemanager'
  11 LOG_SLIVERS = '/var/lib/nodemanager/getslivers.txt'
  12 LOG_DATABASE = '/var/lib/nodemanager/database.txt'
  13
  14 # basically define 3 levels
  15 LOG_NONE=0
  16 LOG_NODE=1
  17 LOG_VERBOSE=2
  18 # default is to log a reasonable amount of stuff for when running on operational nodes
  19 LOG_LEVEL=LOG_NODE
  20
  21 def set_level(level):
  22     global LOG_LEVEL
  23     try:
  24         assert level in [LOG_NONE,LOG_NODE,LOG_VERBOSE]
  25         LOG_LEVEL=level
  26     except:
  27         logger.log("Failed to set LOG_LEVEL to %s"%level)
  28
  29 def verbose(msg):
  30     log('(v) '+msg,LOG_VERBOSE)
  31
  32 def log(msg,level=LOG_NODE):
  33     """Write <msg> to the log file if level >= current log level (default LOG_NODE)."""
  34     if (level > LOG_LEVEL):
  35         return
  36     try:
  37         fd = os.open(LOG_FILE, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
  38         if not msg.endswith('\n'): msg += '\n'
  39         os.write(fd, '%s: %s' % (time.asctime(time.gmtime()), msg))
  40         os.close(fd)
  41     except OSError:
  42         sys.stderr.write(msg)
  43         sys.stderr.flush()
  44
  45 def log_exc(msg="",name=None):
  46     """Log traceback resulting from an exception."""
  47     printout=""
  48     if name: printout += "%s: "%name
  49     printout += "EXCEPTION caught <%s> \n %s" %(msg, traceback.format_exc())
  50     log(printout)
  51
  52 def log_trace(msg="",name=None):
  53     """Log current stack"""
  54     printout=""
  55     if name: printout += "%s: "%name
  56     printout += "LOGTRACE\n"
  57     for frame in traceback.format_stack():
  58         printout += "..."+frame
  59     log(printout)
  60
  61
  62 ########## snapshot data to a file
  63 # for some reason the various modules are still triggered even when the
  64 # data from PLC cannot be reached
  65 # we show this message instead of the exception stack instead in this case
  66 def log_missing_data (msg,key):
  67     log("%s: could not find the %s key in data (PLC connection down?) - IGNORED"%(msg,key))
  68
  69 def log_data_in_file (data, file, message="",level=LOG_NODE):
  70     if (level > LOG_LEVEL):
  71         return
  72     import pprint, time
  73     try:
  74         f=open(file,'w')
  75         now=time.strftime("Last update: %Y.%m.%d at %H:%M:%S %Z", time.localtime())
  76         f.write(now+'\n')
  77         if message: f.write('Message:'+message+'\n')
  78         pp=pprint.PrettyPrinter(stream=f,indent=2)
  79         pp.pprint(data)
  80         f.close()
  81         verbose("logger:.log_data_in_file Owerwrote %s"%file)
  82     except:
  83         log_exc('logger.log_data_in_file failed - file=%s - message=%r'%(file,message))
  84
  85 def log_slivers (data):
  86     log_data_in_file (data, LOG_SLIVERS, "raw GetSlivers")
  87 def log_database (db):
  88     log_data_in_file (db, LOG_DATABASE, "raw database")
  89
  90 #################### child processes
  91 # avoid waiting until the process returns;
  92 # that makes debugging of hanging children hard
  93
  94 class Buffer:
  95     def __init__ (self,message='log_call: '):
  96         self.buffer=''
  97         self.message=message
  98
  99     def add (self,c):
 100         self.buffer += c
 101         if c=='\n': self.flush()
 102
 103     def flush (self):
 104         if self.buffer:
 105             log (self.message + self.buffer)
 106             self.buffer=''
 107
 108 # time out in seconds - avoid hanging subprocesses - default is 5 minutes
 109 default_timeout_minutes=5
 110
 111 # returns a bool that is True when everything goes fine and the retcod is 0
 112 def log_call(command,timeout=default_timeout_minutes*60,poll=1):
 113     message=" ".join(command)
 114     log("log_call: running command %s" % message)
 115     verbose("log_call: timeout=%r s" % timeout)
 116     verbose("log_call: poll=%r s" % poll)
 117     trigger=time.time()+timeout
 118     result = False
 119     try:
 120         child = subprocess.Popen(command, bufsize=1,
 121                                  stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
 122         buffer = Buffer()
 123         while True:
 124             # see if anything can be read within the poll interval
 125             (r,w,x)=select.select([child.stdout],[],[],poll)
 126             if r: buffer.add(child.stdout.read(1))
 127             # is process over ?
 128             returncode=child.poll()
 129             # yes
 130             if returncode != None:
 131                 buffer.flush()
 132                 # child is done and return 0
 133                 if returncode == 0:
 134                     log("log_call:end command (%s) completed" % message)
 135                     result=True
 136                     break
 137                 # child has failed
 138                 else:
 139                     log("log_call:end command (%s) returned with code %d" %(message,returncode))
 140                     break
 141             # no : still within timeout ?
 142             if time.time() >= trigger:
 143                 buffer.flush()
 144                 child.terminate()
 145                 log("log_call:end terminating command (%s) - exceeded timeout %d s"%(message,timeout))
 146                 break
 147     except: log_exc("failed to run command %s" % message)
 148     return result