logger.py

   1 """
   2 A very simple logger that tries to be concurrency-safe.
   3 """
   4
   5 # pylint: disable=c0111
   6
   7 import sys
   8 import os
   9 import time
  10 import traceback
  11 import subprocess
  12 import select
  13
  14 LOG_FILE    = '/var/log/nodemanager'
  15 LOG_SLIVERS = '/var/lib/nodemanager/getslivers.txt'
  16 LOG_DATABASE = '/var/lib/nodemanager/database.txt'
  17
  18 # basically define 3 levels
  19 LOG_NONE = 0
  20 LOG_NODE = 1
  21 LOG_VERBOSE = 2
  22 # default is to log a reasonable amount of stuff for when running on operational nodes
  23 LOG_LEVEL = LOG_NODE
  24
  25 def set_level(level):
  26     global LOG_LEVEL
  27     if level in (LOG_NONE, LOG_NODE, LOG_VERBOSE):
  28         LOG_LEVEL = level
  29     else:
  30         log("Failed to set LOG_LEVEL to %s" % level)
  31
  32 def verbose(msg):
  33     log('(v) ' + msg, LOG_VERBOSE)
  34
  35 def log(msg, level=LOG_NODE):
  36     """
  37     Write <msg> to the log file if level >= current log level (default LOG_NODE).
  38     """
  39     if level > LOG_LEVEL:
  40         return
  41     try:
  42         fd = os.open(LOG_FILE, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
  43         if not msg.endswith('\n'):
  44             msg += '\n'
  45         to_write = '%s: %s' % (time.asctime(time.gmtime()), msg)
  46         os.write(fd, to_write.encode())
  47         os.close(fd)
  48     except OSError:
  49         sys.stderr.write(msg)
  50         sys.stderr.flush()
  51
  52 date_width = 24
  53 def log_exc(msg="", name=None):
  54     """Log traceback resulting from an exception."""
  55     printout = ""
  56     if name:
  57         printout += "%s: "%name
  58     printout += "EXCEPTION caught <%s> \n" % msg
  59     for frame in traceback.format_exc().split("\n"):
  60         printout += (date_width+2)*" " + "%s\n" % frame
  61     log(printout)
  62
  63 def log_trace(msg="", name=None):
  64     """Log current stack"""
  65     printout = ""
  66     if name:
  67         printout += "%s: " % name
  68     printout += "LOGTRACE\n"
  69     for frame in traceback.format_stack():
  70         printout += "..." + frame
  71     log(printout)
  72
  73
  74 ########## snapshot data to a file
  75 # for some reason the various modules are still triggered even when the
  76 # data from PLC cannot be reached
  77 # we show this message instead of the exception stack instead in this case
  78 def log_missing_data(msg, key):
  79     log("%s: could not find the %s key in data (PLC connection down?) - IGNORED"%(msg, key))
  80
  81 def log_data_in_file(data, file, message="", level=LOG_NODE):
  82     if level > LOG_LEVEL:
  83         return
  84     import pprint, time
  85     try:
  86         with open(file, 'w') as f:
  87             now=time.strftime("Last update: %Y.%m.%d at %H:%M:%S %Z", time.localtime())
  88             f.write(now+'\n')
  89             if message: f.write('Message:'+message+'\n')
  90             pp=pprint.PrettyPrinter(stream=f, indent=2)
  91             pp.pprint(data)
  92             f.close()
  93             verbose("logger:.log_data_in_file Owerwrote %s"%file)
  94     except:
  95         log_exc('logger.log_data_in_file failed - file=%s - message=%r'%(file, message))
  96
  97 def log_slivers(data):
  98     log_data_in_file(data, LOG_SLIVERS, "raw GetSlivers")
  99 def log_database(db):
 100     log_data_in_file(db, LOG_DATABASE, "raw database")
 101
 102 #################### child processes
 103 # avoid waiting until the process returns;
 104 # that makes debugging of hanging children hard
 105
 106 class Buffer:
 107     def __init__(self, message='log_call: '):
 108         self.buffer = ''
 109         self.message = message
 110
 111     def add(self, c):
 112         self.buffer += c
 113         if c == '\n':
 114             self.flush()
 115
 116     def flush(self):
 117         if self.buffer:
 118             log(self.message + self.buffer)
 119             self.buffer = ''
 120
 121 # time out in seconds - avoid hanging subprocesses - default is 5 minutes
 122 default_timeout_minutes = 5
 123
 124 # returns a bool that is True when everything goes fine and the retcod is 0
 125 def log_call(command, timeout=default_timeout_minutes*60, poll=1):
 126     message=" ".join(command)
 127     log("log_call: running command %s" % message)
 128     verbose("log_call: timeout=%r s" % timeout)
 129     verbose("log_call: poll=%r s" % poll)
 130     trigger=time.time()+timeout
 131     result = False
 132     try:
 133         child = subprocess.Popen(
 134             command, bufsize=1,
 135             stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
 136             close_fds=True,
 137             universal_newlines=True)
 138         buffer = Buffer()
 139         while True:
 140             # see if anything can be read within the poll interval
 141             (r, w, x) = select.select([child.stdout], [], [], poll)
 142             if r:
 143                 buffer.add(child.stdout.read(1))
 144             # is process over ?
 145             returncode = child.poll()
 146             # yes
 147             if returncode != None:
 148                 buffer.flush()
 149                 # child is done and return 0
 150                 if returncode == 0:
 151                     log("log_call:end command (%s) completed" % message)
 152                     result = True
 153                     break
 154                 # child has failed
 155                 else:
 156                     log("log_call:end command (%s) returned with code %d" %(message, returncode))
 157                     break
 158             # no : still within timeout ?
 159             if time.time() >= trigger:
 160                 buffer.flush()
 161                 child.terminate()
 162                 log("log_call:end terminating command (%s) - exceeded timeout %d s"%(message, timeout))
 163                 break
 164     except:
 165         log_exc("failed to run command %s" % message)
 166     return result