3 # Copyright (c) 2003 Intel Corporation
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
16 from Exceptions import *
17 import notify_messages
18 import BootServerRequest
21 # all output is written to this file
22 BM_NODE_LOG = "/tmp/bm.log"
23 VARS_FILE = "configuration"
25 # the new contents of PATH when the boot manager is running
26 BIN_PATH= ('/usr/local/bin',
33 def read_configuration_file(filename):
34 # read in and store all variables in VARS_FILE into each line
35 # is in the format name=val (any whitespace around the = is
36 # removed. everything after the = to the end of the line is
39 vars_file = file(filename,'r')
41 for line in vars_file:
42 # if its a comment or a whitespace line, ignore
43 if line[:1] == "#" or string.strip(line) == "":
46 parts = string.split(line,"=")
49 raise Exception("Invalid line in vars file: {}".format(line))
51 name = string.strip(parts[0])
52 value = string.strip(parts[1])
53 value = value.replace("'", "") # remove quotes
54 value = value.replace('"', "") # remove quotes
59 raise Exception("Unable to read configuration vars.")
61 # find out which directory we are running it, and set a variable
62 # for that. future steps may need to get files out of the bootmanager
64 current_dir = os.getcwd()
65 vars['BM_SOURCE_DIR'] = current_dir
69 ##############################
72 format = "%H:%M:%S(%Z) "
74 def __init__(self, OutputFilePath=None):
76 self.OutputFile = open(OutputFilePath, "w")
77 self.OutputFilePath = OutputFilePath
79 print("bootmanager log : Unable to open output file {}, continuing"\
80 .format(OutputFilePath))
81 self.OutputFile = None
85 vars = read_configuration_file(VARS_FILE)
91 def LogEntry(self, str, inc_newline = 1, display_screen = 1):
92 now = time.strftime(log.format, time.localtime())
94 self.OutputFile.write(now + str)
96 sys.stdout.write(now + str)
100 sys.stdout.write("\n")
102 self.OutputFile.write("\n")
105 self.OutputFile.flush()
107 def write(self, str):
109 make log behave like a writable file object (for traceback
112 self.LogEntry(str, 0, 1)
114 def print_stack(self):
116 dump current stack in log
118 self.write(traceback.format_exc())
120 # bm log uploading is available back again, as of nodeconfig-5.0-2
121 def Upload(self, extra_file=None):
123 upload the contents of the log to the server
125 if self.OutputFile is not None:
126 self.OutputFile.flush()
128 self.LogEntry("Uploading logs to {}".format(self.VARS['UPLOAD_LOG_SCRIPT']))
130 self.OutputFile.close()
131 self.OutputFile= None
133 hostname = self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
134 self.VARS['INTERFACE_SETTINGS']['domainname']
135 bs_request = BootServerRequest.BootServerRequest(self.VARS)
137 # this was working until f10
138 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
139 GetVars = None, PostVars = None,
140 DoSSL = True, DoCertCheck = True,
141 FormData = ["log=@" + self.OutputFilePath,
142 "hostname=" + hostname,
147 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
148 GetVars = None, PostVars = None,
149 DoSSL = True, DoCertCheck = True,
150 FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
151 ("hostname",hostname),
153 if extra_file is not None:
154 # NOTE: for code-reuse, evoke the bash function 'upload_logs';
155 # by adding --login, bash reads .bash_profile before execution.
156 # Also, never fail, since this is an optional feature.
157 utils.sysexec_noerr("""bash --login -c "upload_logs {}" """.format(extra_file), self)
160 ##############################
163 # file containing initial variables/constants
165 # the set of valid node run states
166 NodeRunStates = {'reinstall':None,
172 def __init__(self, log, forceState):
173 # override machine's current state from the command line
174 self.forceState = forceState
176 # the main logging point
179 # set to 1 if we can run after initialization
183 # this contains a set of information used and updated by each step
188 # not sure what the current PATH is set to, replace it with what
189 # we know will work with all the boot cds
190 os.environ['PATH'] = string.join(BIN_PATH,":")
196 core boot manager logic.
198 the way errors are handled is as such: if any particular step
199 cannot continue or unexpectibly fails, an exception is thrown.
200 in this case, the boot manager cannot continue running.
202 these step functions can also return a 0/1 depending on whether
203 or not it succeeded. In the case of steps like ConfirmInstallWithUser,
204 a 0 is returned and no exception is thrown if the user chose not
205 to confirm the install. The same goes with the CheckHardwareRequirements.
206 If requriements not met, but tests were succesfull, return 0.
208 for steps that run within the installer, they are expected to either
209 complete succesfully and return 1, or throw an exception.
211 For exact return values and expected operations, see the comments
212 at the top of each of the invididual step functions.
215 def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
216 # called by the _xxxState() functions below upon failure
217 self.VARS['RUN_LEVEL'] = 'failboot'
218 notify = getattr(notify_messages, message)
219 self.VARS['STATE_CHANGE_NOTIFY'] = 1
220 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE'] = notify
221 raise BootManagerException, notify
224 # implements the boot logic, which consists of first
225 # double checking that the node was properly installed,
226 # checking whether someone added or changed disks, and
227 # then finally chain boots.
229 # starting the fallback/debug ssh daemon for safety:
230 # if the node install somehow hangs, or if it simply takes ages,
231 # we can still enter and investigate
233 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
237 InstallInit.Run(self.VARS, self.LOG)
238 ret = ValidateNodeInstall.Run(self.VARS, self.LOG)
240 # Thierry - feb. 2013 turning off WriteModprobeConfig for now on lxc
241 # for one thing this won't work at all with f18, as modules.pcimap
242 # has disappeared (Daniel suggested modules.aliases could be used instead)
243 # and second, in any case it's been years now that modprobe.conf was deprecated
244 # so most likely this code has no actual effect
245 if self.VARS['virt'] == 'vs':
246 WriteModprobeConfig.Run(self.VARS, self.LOG)
247 WriteNetworkConfig.Run(self.VARS, self.LOG)
248 CheckForNewDisks.Run(self.VARS, self.LOG)
249 SendHardwareConfigToPLC.Run(self.VARS, self.LOG)
250 ChainBootNode.Run(self.VARS, self.LOG)
252 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
254 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
256 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
262 # starting the fallback/debug ssh daemon for safety:
263 # if the node install somehow hangs, or if it simply takes ages,
264 # we can still enter and investigate
266 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
270 # implements the reinstall logic, which will check whether
271 # the min. hardware requirements are met, install the
272 # software, and upon correct installation will switch too
273 # 'boot' state and chainboot into the production system
274 if not CheckHardwareRequirements.Run(self.VARS, self.LOG):
275 self.VARS['RUN_LEVEL'] = 'failboot'
276 raise BootManagerException, "Hardware requirements not met."
279 InstallPartitionDisks.Run( self.VARS, self.LOG )
280 InstallInit.Run(self.VARS, self.LOG)
281 InstallBootstrapFS.Run(self.VARS, self.LOG)
282 InstallWriteConfig.Run(self.VARS, self.LOG)
283 InstallUninitHardware.Run(self.VARS, self.LOG)
284 self.VARS['BOOT_STATE'] = 'boot'
285 self.VARS['STATE_CHANGE_NOTIFY'] = 1
286 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE'] = \
287 notify_messages.MSG_INSTALL_FINISHED
288 AnsibleHook.Run(self.VARS, self.LOG)
289 UpdateBootStateWithPLC.Run(self.VARS, self.LOG)
293 # implements the new install logic, which will first check
294 # with the user whether it is ok to install on this
295 # machine, switch to 'reinstall' state and then invoke the reinstall
296 # logic. See reinstallState logic comments for further
298 if not ConfirmInstallWithUser.Run(self.VARS, self.LOG):
300 self.VARS['BOOT_STATE'] = 'reinstall'
302 AnsibleHook.Run(self.VARS, self.LOG)
305 def _debugRun(state='failboot'):
306 # implements debug logic, which starts the sshd and just waits around
307 self.VARS['RUN_LEVEL'] = state
308 StartDebug.Run(self.VARS, self.LOG)
309 # fsck/mount fs if present, and ignore return value if it's not.
310 ValidateNodeInstall.Run(self.VARS, self.LOG)
313 # should never happen; log event
314 self.LOG.write("\nInvalid BOOT_STATE = {}\n".format(self.VARS['BOOT_STATE']))
317 # setup state -> function hash table
318 BootManager.NodeRunStates['reinstall'] = _reinstallRun
319 BootManager.NodeRunStates['boot'] = _bootRun
320 BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot')
321 BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled')
325 InitializeBootManager.Run(self.VARS, self.LOG)
326 ReadNodeConfiguration.Run(self.VARS, self.LOG)
327 AuthenticateWithPLC.Run(self.VARS, self.LOG)
328 UpdateLastBootOnce.Run(self.VARS, self.LOG)
329 StartRunlevelAgent.Run(self.VARS, self.LOG)
330 GetAndUpdateNodeDetails.Run(self.VARS, self.LOG)
332 # override machine's current state from the command line
333 if self.forceState is not None:
334 self.VARS['BOOT_STATE'] = self.forceState
335 UpdateBootStateWithPLC.Run(self.VARS, self.LOG)
337 stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
341 except KeyError as e:
342 self.LOG.write("\n\nKeyError while running: {}\n".format(e))
343 self.LOG.print_stack ()
344 except BootManagerException as e:
345 self.LOG.write("\n\nException while running: {}\n".format(e))
346 self.LOG.print_stack ()
347 except BootManagerAuthenticationException as e:
348 self.LOG.write("\n\nFailed to Authenticate Node: {}\n".format(e))
349 self.LOG.print_stack ()
350 # sets /tmp/CANCEL_BOOT flag
351 StartDebug.Run(self.VARS, self.LOG)
352 # Return immediately b/c any other calls to API will fail
355 self.LOG.write("\n\nImplementation Error\n")
356 self.LOG.print_stack ()
361 except BootManagerException, e:
362 self.LOG.write("\n\nException while running: {}\n".format(e))
364 self.LOG.write("\n\nImplementation Error\n")
365 traceback.print_exc(file=self.LOG.OutputFile)
366 traceback.print_exc()
374 utils.prompt_for_breakpoint_mode()
376 # utils.breakpoint ("Entering BootManager::main")
378 # set to 1 if error occurred
381 # all output goes through this class so we can save it and post
382 # the data back to PlanetLab central
383 LOG = log(BM_NODE_LOG)
385 # NOTE: assume CWD is BM's source directory, but never fail
386 utils.sysexec_noerr("./setup_bash_history_scripts.sh", LOG)
388 LOG.LogEntry("BootManager started at: {}"\
389 .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
395 if BootManager.NodeRunStates.has_key(fState):
398 LOG.LogEntry("FATAL: cannot force node run state to={}".format(fState))
401 traceback.print_exc(file=LOG.OutputFile)
402 traceback.print_exc()
405 LOG.LogEntry("BootManager finished at: {}"\
406 .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
411 bm = BootManager(LOG, forceState)
413 LOG.LogEntry("Unable to initialize BootManager.")
415 LOG.LogEntry("Running version {} of BootManager.".format(bm.VARS['VERSION']))
418 LOG.LogEntry("\nDone!");
420 LOG.LogEntry("\nError occurred!");
423 traceback.print_exc(file=LOG.OutputFile)
424 traceback.print_exc()
426 LOG.LogEntry("BootManager finished at: {}"\
427 .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
433 if __name__ == "__main__":
434 error = main(sys.argv)