6 # Copyright (c) 2003 Intel Corporation
9 # Copyright (c) 2004-2006 The Trustees of Princeton University
10 # All rights reserved.
13 import sys, os, traceback
18 from Exceptions import *
19 import notify_messages
20 import BootServerRequest
23 # all output is written to this file
24 BM_NODE_LOG= "/tmp/bm.log"
25 VARS_FILE = "configuration"
27 # the new contents of PATH when the boot manager is running
28 BIN_PATH= ('/usr/local/bin',
35 def read_configuration_file(filename):
36 # read in and store all variables in VARS_FILE into each line
37 # is in the format name=val (any whitespace around the = is
38 # removed. everything after the = to the end of the line is
41 vars_file= file(filename,'r')
43 for line in vars_file:
44 # if its a comment or a whitespace line, ignore
45 if line[:1] == "#" or string.strip(line) == "":
48 parts= string.split(line,"=")
51 raise Exception( "Invalid line in vars file: %s" % line )
53 name= string.strip(parts[0])
54 value= string.strip(parts[1])
55 value= value.replace("'", "") # remove quotes
56 value= value.replace('"', "") # remove quotes
61 raise Exception( "Unable to read configuration vars." )
63 # find out which directory we are running it, and set a variable
64 # for that. future steps may need to get files out of the bootmanager
66 current_dir= os.getcwd()
67 vars['BM_SOURCE_DIR']= current_dir
71 ##############################
74 format="%H:%M:%S(%Z) "
76 def __init__( self, OutputFilePath= None ):
78 self.OutputFile= open( OutputFilePath, "w")
79 self.OutputFilePath= OutputFilePath
81 print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
86 vars = read_configuration_file(VARS_FILE)
89 self.LogEntry( str(e) )
92 def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
93 now=time.strftime(log.format, time.localtime())
95 self.OutputFile.write( now+str )
97 sys.stdout.write( now+str )
101 sys.stdout.write( "\n" )
103 self.OutputFile.write( "\n" )
106 self.OutputFile.flush()
108 def write( self, str ):
110 make log behave like a writable file object (for traceback
113 self.LogEntry( str, 0, 1 )
115 # bm log uploading is available back again, as of nodeconfig-5.0-2
116 def Upload( self, extra_file=None ):
118 upload the contents of the log to the server
120 if self.OutputFile is not None:
121 self.OutputFile.flush()
123 self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] )
125 self.OutputFile.close()
126 self.OutputFile= None
128 hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
129 self.VARS['INTERFACE_SETTINGS']['domainname']
130 bs_request = BootServerRequest.BootServerRequest(self.VARS)
132 # this was working until f10
133 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
134 GetVars = None, PostVars = None,
135 DoSSL = True, DoCertCheck = True,
136 FormData = ["log=@" + self.OutputFilePath,
137 "hostname=" + hostname,
142 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
143 GetVars = None, PostVars = None,
144 DoSSL = True, DoCertCheck = True,
145 FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
146 ("hostname",hostname),
148 if extra_file is not None:
149 # NOTE: for code-reuse, evoke the bash function 'upload_logs';
150 # by adding --login, bash reads .bash_profile before execution.
151 # Also, never fail, since this is an optional feature.
152 utils.sysexec_noerr( """bash --login -c "upload_logs %s" """ % extra_file, self)
155 ##############################
158 # file containing initial variables/constants
160 # the set of valid node run states
161 NodeRunStates = {'reinstall':None,
167 def __init__(self, log, forceState):
168 # override machine's current state from the command line
169 self.forceState = forceState
171 # the main logging point
174 # set to 1 if we can run after initialization
178 # this contains a set of information used and updated by each step
183 # not sure what the current PATH is set to, replace it with what
184 # we know will work with all the boot cds
185 os.environ['PATH']= string.join(BIN_PATH,":")
191 core boot manager logic.
193 the way errors are handled is as such: if any particular step
194 cannot continue or unexpectibly fails, an exception is thrown.
195 in this case, the boot manager cannot continue running.
197 these step functions can also return a 0/1 depending on whether
198 or not it succeeded. In the case of steps like ConfirmInstallWithUser,
199 a 0 is returned and no exception is thrown if the user chose not
200 to confirm the install. The same goes with the CheckHardwareRequirements.
201 If requriements not met, but tests were succesfull, return 0.
203 for steps that run within the installer, they are expected to either
204 complete succesfully and return 1, or throw an execption.
206 For exact return values and expected operations, see the comments
207 at the top of each of the invididual step functions.
210 def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
211 # called by the _xxxState() functions below upon failure
212 self.VARS['RUN_LEVEL']= 'failboot'
213 notify = getattr(notify_messages, message)
214 self.VARS['STATE_CHANGE_NOTIFY']= 1
215 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
216 raise BootManagerException, notify
219 # implements the boot logic, which consists of first
220 # double checking that the node was properly installed,
221 # checking whether someone added or changed disks, and
222 # then finally chain boots.
224 # starting the fallback/debug ssh daemon for safety:
225 # if the node install somehow hangs, or if it simply takes ages,
226 # we can still enter and investigate
228 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
232 InstallInit.Run( self.VARS, self.LOG )
233 ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
235 WriteModprobeConfig.Run( self.VARS, self.LOG )
236 WriteNetworkConfig.Run( self.VARS, self.LOG )
237 CheckForNewDisks.Run( self.VARS, self.LOG )
238 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
239 ChainBootNode.Run( self.VARS, self.LOG )
241 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
243 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
245 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
251 # starting the fallback/debug ssh daemon for safety:
252 # if the node install somehow hangs, or if it simply takes ages,
253 # we can still enter and investigate
255 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
259 # implements the reinstall logic, which will check whether
260 # the min. hardware requirements are met, install the
261 # software, and upon correct installation will switch too
262 # 'boot' state and chainboot into the production system
263 if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
264 self.VARS['RUN_LEVEL']= 'failboot'
265 raise BootManagerException, "Hardware requirements not met."
268 InstallInit.Run( self.VARS, self.LOG )
269 InstallPartitionDisks.Run( self.VARS, self.LOG )
270 InstallBootstrapFS.Run( self.VARS, self.LOG )
271 InstallWriteConfig.Run( self.VARS, self.LOG )
272 InstallUninitHardware.Run( self.VARS, self.LOG )
273 self.VARS['BOOT_STATE']= 'boot'
274 self.VARS['STATE_CHANGE_NOTIFY']= 1
275 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
276 notify_messages.MSG_INSTALL_FINISHED
277 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
281 # implements the new install logic, which will first check
282 # with the user whether it is ok to install on this
283 # machine, switch to 'reinstall' state and then invoke the reinstall
284 # logic. See reinstallState logic comments for further
286 if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
288 self.VARS['BOOT_STATE']= 'reinstall'
289 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
292 def _debugRun(state='failboot'):
293 # implements debug logic, which starts the sshd and just waits around
294 self.VARS['RUN_LEVEL']=state
295 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
296 StartDebug.Run( self.VARS, self.LOG )
297 # fsck/mount fs if present, and ignore return value if it's not.
298 ValidateNodeInstall.Run( self.VARS, self.LOG )
301 # should never happen; log event
302 self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
305 # setup state -> function hash table
306 BootManager.NodeRunStates['reinstall'] = _reinstallRun
307 BootManager.NodeRunStates['boot'] = _bootRun
308 BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot')
309 BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled')
313 InitializeBootManager.Run( self.VARS, self.LOG )
314 ReadNodeConfiguration.Run( self.VARS, self.LOG )
315 AuthenticateWithPLC.Run( self.VARS, self.LOG )
316 StartRunlevelAgent.Run( self.VARS, self.LOG )
317 GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
319 # override machine's current state from the command line
320 if self.forceState is not None:
321 self.VARS['BOOT_STATE']= self.forceState
322 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
323 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
325 stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
330 self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
331 except BootManagerException, e:
332 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
333 except BootManagerAuthenticationException, e:
334 self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
335 # sets /tmp/CANCEL_BOOT flag
336 StartDebug.Run(self.VARS, self.LOG )
337 # Return immediately b/c any other calls to API will fail
340 self.LOG.write( "\n\nImplementation Error\n")
341 traceback.print_exc(file=self.LOG.OutputFile)
342 traceback.print_exc()
347 except BootManagerException, e:
348 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
350 self.LOG.write( "\n\nImplementation Error\n")
351 traceback.print_exc(file=self.LOG.OutputFile)
352 traceback.print_exc()
360 utils.prompt_for_breakpoint_mode()
362 utils.breakpoint ("Entering BootManager::main")
364 # set to 1 if error occurred
367 # all output goes through this class so we can save it and post
368 # the data back to PlanetLab central
369 LOG= log( BM_NODE_LOG )
371 # NOTE: assume CWD is BM's source directory, but never fail
372 utils.sysexec_noerr("./setup_bash_history_scripts.sh", LOG)
374 LOG.LogEntry( "BootManager started at: %s" % \
375 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
381 if BootManager.NodeRunStates.has_key(fState):
384 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
387 traceback.print_exc(file=LOG.OutputFile)
388 traceback.print_exc()
391 LOG.LogEntry( "BootManager finished at: %s" % \
392 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
397 bm= BootManager(LOG,forceState)
399 LOG.LogEntry( "Unable to initialize BootManager." )
401 LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
404 LOG.LogEntry( "\nDone!" );
406 LOG.LogEntry( "\nError occurred!" );
409 traceback.print_exc(file=LOG.OutputFile)
410 traceback.print_exc()
412 LOG.LogEntry( "BootManager finished at: %s" % \
413 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
419 if __name__ == "__main__":
420 error = main(sys.argv)