3 # RunlevelAgent - acts as a heartbeat back to myplc reporting that the node is
4 # online and whether it is in boot or pre-boot run-level.
5 # This is useful to identify nodes that are behind a firewall, as well as to
6 # have the machine report run-time status both in safeboot and boot modes,
7 # so that it is immediately visible at myplc (gui or api).
10 from __future__ import print_function
21 CONFIG_FILE = "/tmp/source/configuration"
22 SESSION_FILE = "/etc/planetlab/session"
23 RLA_PID_FILE = "/var/run/rla.pid"
25 def read_config_file(filename):
26 ## NOTE: text copied from BootManager.py
27 # TODO: unify this code to make it common. i.e. use ConfigParser module
29 vars_file = file(filename,'r')
31 for line in vars_file:
32 # if its a comment or a whitespace line, ignore
33 if line[:1] == "#" or string.strip(line) == "":
36 parts = string.split(line, "=")
38 print("Invalid line in vars file: {}".format(line))
42 name = string.strip(parts[0])
43 value = string.strip(parts[1])
48 print("Unable to read configuration vars.")
53 sys.path = ['/etc/planetlab'] + sys.path
55 api_server_url = "https://" + plc_config.PLC_API_HOST + plc_config.PLC_API_PATH
57 filename = CONFIG_FILE
58 vars = read_config_file(filename)
59 api_server_url = vars['BOOT_API_SERVER']
63 def __init__(self, username=None, password=None, **kwargs):
64 if 'session' in kwargs:
65 self.auth = { 'AuthMethod' : 'session',
66 'session' : kwargs['session'] }
68 if username is None and password is None:
69 self.auth = {'AuthMethod': "anonymous"}
71 self.auth = {'Username' : username,
72 'AuthMethod' : 'password',
73 'AuthString' : password}
75 def __init__(self, auth, url):
78 # Using a self signed certificate
79 # https://www.python.org/dev/peps/pep-0476/
80 if hasattr(ssl, '_create_unverified_context'):
81 self.api = xmlrpclib.Server(self.url, verbose=False, allow_none=True,
82 context=ssl._create_unverified_context())
84 self.api = xmlrpclib.Server(self.url, verbose=False, allow_none=True)
86 def __getattr__(self, name):
87 method = getattr(self.api, name)
89 raise AssertionError("method does not exist")
91 return lambda *params : method(self.auth.auth, *params)
94 return self.api.__repr__()
96 def extract_from(filename, pattern):
97 f = os.popen("grep -E {} {}".format(pattern, filename))
98 val = f.read().strip()
101 def check_running(commandname):
102 f = os.popen("ps ax | grep -E {} | grep -v grep".format(commandname))
103 val = f.read().strip()
111 f = open(RLA_PID_FILE, 'w')
112 f.write("{}\n".format(pid))
115 print("Uuuhhh.... this should not occur.")
122 # Keep trying to authenticate session, waiting for NM to re-write the
123 # session file, or DNS to succeed, until AuthCheck succeeds.
126 f = open(SESSION_FILE, 'r')
127 session_str = f.read().strip()
128 api = PLC(Auth(session=session_str), api_server_url)
129 # NOTE: What should we do if this call fails?
130 # TODO: handle dns failure here.
134 print("Retry in 30 seconds: ", os.popen("uptime").read().strip())
135 traceback.print_exc(limit=5)
140 if len(sys.argv) > 2:
143 traceback.print_exc()
147 # NOTE: here we are inferring the runlevel by environmental
148 # observations. We know how this process was started by the
149 # given command line argument. Then in bootmanager
150 # runlevel, the bm.log gives information about the current
153 # call plc for current boot state?
154 # how long have we been running?
155 if env == "bootmanager":
156 bs_val = extract_from('/tmp/bm.log', "'Current boot state:'")
157 if len(bs_val) > 0: bs_val = bs_val.split()[-1]
158 ex_val = extract_from('/tmp/bm.log', 'Exception')
159 fs_val = extract_from('/tmp/bm.log', 'mke2fs')
160 bm_val = check_running("BootManager.py")
162 if bs_val in ['diag', 'diagnose', 'safeboot', 'disabled', 'disable']:
163 api.ReportRunlevel({'run_level' : 'safeboot'})
165 elif len(ex_val) > len("Exception"):
166 api.ReportRunlevel({'run_level' : 'failboot'})
168 elif len(fs_val) > 0 and len(bm_val) > 0:
169 api.ReportRunlevel({'run_level' : 'reinstall'})
172 api.ReportRunlevel({'run_level' : 'failboot'})
174 elif env == "production":
175 api.ReportRunlevel({'run_level' : 'boot'})
177 api.ReportRunlevel({'run_level' : 'failboot'})
180 print("reporting error: ", os.popen("uptime").read().strip())
181 traceback.print_exc()
184 # TODO: change to a configurable value
189 os.stat(RLA_PID_FILE)
190 f = os.popen("ps ax | grep RunlevelAgent | grep -Ev 'grep|vim' | awk '{print $1}' | wc -l")
196 os.unlink(RLA_PID_FILE)
207 pid = open(RLA_PID_FILE, 'r').read().strip()
209 # Try three different ways to kill the process. Just to be sure.
210 os.kill(int(pid), signal.SIGKILL)
211 os.system("pkill RunlevelAgent.py")
212 os.system("ps ax | grep RunlevelAgent | grep -v grep | awk '{print $1}' | xargs kill -9 ")
214 if __name__ == "__main__":
215 if "start" in sys.argv and not agent_running():
218 if "stop" in sys.argv and agent_running():