#!/usr/bin/python
+#
+# RunlevelAgent - acts as a heartbeat back to myplc reporting that the node is
+# online and whether it is in boot or pre-boot run-level.
+# This is useful to identify nodes that are behind a firewall, as well as to
+# have the machine report run-time status both in safeboot and boot modes,
+# so that it is immediately visible at myplc (gui or api).
+#
import xml, xmlrpclib
import logging
return self.api.__repr__()
def extract_from(filename, pattern):
- f = os.popen("grep -E %s %s" % (pattern, filename))
- val = f.read().strip()
- return val
+ f = os.popen("grep -E %s %s" % (pattern, filename))
+ val = f.read().strip()
+ return val
def check_running(commandname):
- f = os.popen("ps ax | grep -E %s | grep -v grep" % (commandname))
- val = f.read().strip()
- return val
-
+ f = os.popen("ps ax | grep -E %s | grep -v grep" % (commandname))
+ val = f.read().strip()
+ return val
+
def main():
- f=open(SESSION_FILE,'r')
- session_str=f.read().strip()
- api = PLC(Auth(session=session_str), api_server_url)
- # NOTE: What should we do if this call fails?
- api.AuthCheck()
+ # Keep trying to authenticate session, waiting for NM to re-write the
+ # session file, or DNS to succeed, until AuthCheck succeeds.
+ while True:
+ try:
+ f=open(SESSION_FILE,'r')
+ session_str=f.read().strip()
+ api = PLC(Auth(session=session_str), api_server_url)
+ # NOTE: What should we do if this call fails?
+ # TODO: handle dns failure here.
+ api.AuthCheck()
+ break
+ except:
+ print "Retry in 30 seconds: ", os.popen("uptime").read().strip()
+ traceback.print_exc()
+ time.sleep(30)
try:
env = 'production'
traceback.print_exc()
while True:
- #print "reporting status: ", os.popen("uptime").read().strip()
try:
# NOTE: here we are inferring the runlevel by environmental
- # observations. We know how this process was started by the
- # given command line argument. Then in bootmanager
- # runlevle, the bm.log gives information about the current
- # activity.
- # other options:
- # call plc for current boot state?
- # how long have we been running?
+ # observations. We know how this process was started by the
+ # given command line argument. Then in bootmanager
+ # runlevel, the bm.log gives information about the current
+ # activity.
+ # other options:
+ # call plc for current boot state?
+ # how long have we been running?
if env == "bootmanager":
- bs_val = extract_from('/tmp/bm.log', 'Current boot state:').split()[3]
- ex_val = extract_from('/tmp/bm.log', 'Exception')
- fs_val = extract_from('/tmp/bm.log', 'mke2fs')
- bm_val = check_running("BootManager.py")
+ bs_val = extract_from('/tmp/bm.log', "'Current boot state:'")
+ if len(bs_val) > 0: bs_val = bs_val.split()[-1]
+ ex_val = extract_from('/tmp/bm.log', 'Exception')
+ fs_val = extract_from('/tmp/bm.log', 'mke2fs')
+ bm_val = check_running("BootManager.py")
- if bs_val in ['diag', 'diagnose', 'safeboot', 'disabled', 'disable']:
- api.ReportRunlevel({'run_level' : 'safeboot'})
+ if bs_val in ['diag', 'diagnose', 'safeboot', 'disabled', 'disable']:
+ api.ReportRunlevel({'run_level' : 'safeboot'})
- elif len(ex_val) > len("Exception"):
- api.ReportRunlevel({'run_level' : 'failboot'})
+ elif len(ex_val) > len("Exception"):
+ api.ReportRunlevel({'run_level' : 'failboot'})
- elif len(fs_val) > 0 and len(bm_val) > 0:
- api.ReportRunlevel({'run_level' : 'reinstall'})
+ elif len(fs_val) > 0 and len(bm_val) > 0:
+ api.ReportRunlevel({'run_level' : 'reinstall'})
- else:
- api.ReportRunlevel({'run_level' : 'failboot'})
+ else:
+ api.ReportRunlevel({'run_level' : 'failboot'})
elif env == "production":
api.ReportRunlevel({'run_level' : 'boot'})
api.ReportRunlevel({'run_level' : 'failboot'})
except:
+ print "reporting error: ", os.popen("uptime").read().strip()
traceback.print_exc()
- # TODO: change to a configurable value
sys.stdout.flush()
+ # TODO: change to a configurable value
time.sleep(60*15)
if __name__ == "__main__":