2 A very simple logger that tries to be concurrency-safe.
5 # pylint: disable=c0111
14 LOG_FILE = '/var/log/nodemanager'
15 LOG_SLIVERS = '/var/lib/nodemanager/getslivers.txt'
16 LOG_DATABASE = '/var/lib/nodemanager/database.txt'
18 # basically define 3 levels
22 # default is to log a reasonable amount of stuff for when running on operational nodes
27 if level in (LOG_NONE, LOG_NODE, LOG_VERBOSE):
30 log("Failed to set LOG_LEVEL to %s" % level)
33 log('(v) ' + msg, LOG_VERBOSE)
35 def log(msg, level=LOG_NODE):
37 Write <msg> to the log file if level >= current log level (default LOG_NODE).
42 fd = os.open(LOG_FILE, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
43 if not msg.endswith('\n'):
45 to_write = '%s: %s' % (time.asctime(time.gmtime()), msg)
46 os.write(fd, to_write.encode())
53 def log_exc(msg="", name=None):
54 """Log traceback resulting from an exception."""
57 printout += "%s: "%name
58 printout += "EXCEPTION caught <%s> \n" % msg
59 for frame in traceback.format_exc().split("\n"):
60 printout += (date_width+2)*" " + "%s\n" % frame
63 def log_trace(msg="", name=None):
64 """Log current stack"""
67 printout += "%s: " % name
68 printout += "LOGTRACE\n"
69 for frame in traceback.format_stack():
70 printout += "..." + frame
74 ########## snapshot data to a file
75 # for some reason the various modules are still triggered even when the
76 # data from PLC cannot be reached
77 # we show this message instead of the exception stack instead in this case
78 def log_missing_data(msg, key):
79 log("%s: could not find the %s key in data (PLC connection down?) - IGNORED"%(msg, key))
81 def log_data_in_file(data, file, message="", level=LOG_NODE):
86 with open(file, 'w') as f:
87 now=time.strftime("Last update: %Y.%m.%d at %H:%M:%S %Z", time.localtime())
89 if message: f.write('Message:'+message+'\n')
90 pp=pprint.PrettyPrinter(stream=f, indent=2)
93 verbose("logger:.log_data_in_file Owerwrote %s"%file)
95 log_exc('logger.log_data_in_file failed - file=%s - message=%r'%(file, message))
97 def log_slivers(data):
98 log_data_in_file(data, LOG_SLIVERS, "raw GetSlivers")
100 log_data_in_file(db, LOG_DATABASE, "raw database")
102 #################### child processes
103 # avoid waiting until the process returns;
104 # that makes debugging of hanging children hard
107 def __init__(self, message='log_call: '):
109 self.message = message
118 log(self.message + self.buffer)
121 # time out in seconds - avoid hanging subprocesses - default is 5 minutes
122 default_timeout_minutes = 5
124 # returns a bool that is True when everything goes fine and the retcod is 0
125 def log_call(command, timeout=default_timeout_minutes*60, poll=1):
126 message=" ".join(command)
127 log("log_call: running command %s" % message)
128 verbose("log_call: timeout=%r s" % timeout)
129 verbose("log_call: poll=%r s" % poll)
130 trigger=time.time()+timeout
133 child = subprocess.Popen(command, bufsize=1,
134 stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
137 # see if anything can be read within the poll interval
138 (r, w, x) = select.select([child.stdout], [], [], poll)
140 buffer.add(child.stdout.read(1))
142 returncode = child.poll()
144 if returncode != None:
146 # child is done and return 0
148 log("log_call:end command (%s) completed" % message)
153 log("log_call:end command (%s) returned with code %d" %(message, returncode))
155 # no : still within timeout ?
156 if time.time() >= trigger:
159 log("log_call:end terminating command (%s) - exceeded timeout %d s"%(message, timeout))
162 log_exc("failed to run command %s" % message)