6 # Copyright (c) 2003 Intel Corporation
9 # Copyright (c) 2004-2006 The Trustees of Princeton University
10 # All rights reserved.
13 import sys, os, traceback
18 from Exceptions import *
19 import notify_messages
20 import BootServerRequest
23 # all output is written to this file
24 BM_NODE_LOG= "/tmp/bm.log"
25 VARS_FILE = "configuration"
27 # the new contents of PATH when the boot manager is running
28 BIN_PATH= ('/usr/local/bin',
35 def read_configuration_file(filename):
36 # read in and store all variables in VARS_FILE into each line
37 # is in the format name=val (any whitespace around the = is
38 # removed. everything after the = to the end of the line is
41 vars_file= file(filename,'r')
43 for line in vars_file:
44 # if its a comment or a whitespace line, ignore
45 if line[:1] == "#" or string.strip(line) == "":
48 parts= string.split(line,"=")
51 raise Exception( "Invalid line in vars file: %s" % line )
53 name= string.strip(parts[0])
54 value= string.strip(parts[1])
55 value= value.replace("'", "") # remove quotes
56 value= value.replace('"', "") # remove quotes
61 raise Exception( "Unable to read configuration vars." )
63 # find out which directory we are running it, and set a variable
64 # for that. future steps may need to get files out of the bootmanager
66 current_dir= os.getcwd()
67 vars['BM_SOURCE_DIR']= current_dir
71 ##############################
74 format="%H:%M:%S(%Z) "
76 def __init__( self, OutputFilePath= None ):
78 self.OutputFile= open( OutputFilePath, "w")
79 self.OutputFilePath= OutputFilePath
81 print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
86 vars = read_configuration_file(VARS_FILE)
89 self.LogEntry( str(e) )
92 def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
93 now=time.strftime(log.format, time.localtime())
95 self.OutputFile.write( now+str )
97 sys.stdout.write( now+str )
101 sys.stdout.write( "\n" )
103 self.OutputFile.write( "\n" )
106 self.OutputFile.flush()
108 def write( self, str ):
110 make log behave like a writable file object (for traceback
113 self.LogEntry( str, 0, 1 )
115 # bm log uploading is available back again, as of nodeconfig-5.0-2
116 def Upload( self, extra_file=None ):
118 upload the contents of the log to the server
120 if self.OutputFile is not None:
121 self.OutputFile.flush()
123 self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] )
125 self.OutputFile.close()
126 self.OutputFile= None
128 hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
129 self.VARS['INTERFACE_SETTINGS']['domainname']
130 bs_request = BootServerRequest.BootServerRequest(self.VARS)
132 # this was working until f10
133 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
134 GetVars = None, PostVars = None,
135 DoSSL = True, DoCertCheck = True,
136 FormData = ["log=@" + self.OutputFilePath,
137 "hostname=" + hostname,
142 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
143 GetVars = None, PostVars = None,
144 DoSSL = True, DoCertCheck = True,
145 FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
146 ("hostname",hostname),
148 if extra_file is not None:
149 # NOTE: for code-reuse, evoke the bash function 'upload_logs';
150 # by adding --login, bash reads .bash_profile before execution.
151 utils.sysexec( """bash --login -c "upload_logs %s" """ % extra_file, self)
154 ##############################
157 # file containing initial variables/constants
159 # the set of valid node run states
160 NodeRunStates = {'reinstall':None,
166 def __init__(self, log, forceState):
167 # override machine's current state from the command line
168 self.forceState = forceState
170 # the main logging point
173 # set to 1 if we can run after initialization
177 # this contains a set of information used and updated by each step
182 # not sure what the current PATH is set to, replace it with what
183 # we know will work with all the boot cds
184 os.environ['PATH']= string.join(BIN_PATH,":")
190 core boot manager logic.
192 the way errors are handled is as such: if any particular step
193 cannot continue or unexpectibly fails, an exception is thrown.
194 in this case, the boot manager cannot continue running.
196 these step functions can also return a 0/1 depending on whether
197 or not it succeeded. In the case of steps like ConfirmInstallWithUser,
198 a 0 is returned and no exception is thrown if the user chose not
199 to confirm the install. The same goes with the CheckHardwareRequirements.
200 If requriements not met, but tests were succesfull, return 0.
202 for steps that run within the installer, they are expected to either
203 complete succesfully and return 1, or throw an execption.
205 For exact return values and expected operations, see the comments
206 at the top of each of the invididual step functions.
209 def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
210 # called by the _xxxState() functions below upon failure
211 self.VARS['RUN_LEVEL']= 'failboot'
212 notify = getattr(notify_messages, message)
213 self.VARS['STATE_CHANGE_NOTIFY']= 1
214 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
215 raise BootManagerException, notify
218 # implements the boot logic, which consists of first
219 # double checking that the node was properly installed,
220 # checking whether someone added or changed disks, and
221 # then finally chain boots.
223 # starting the fallback/debug ssh daemon for safety:
224 # if the node install somehow hangs, or if it simply takes ages,
225 # we can still enter and investigate
227 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
231 InstallInit.Run( self.VARS, self.LOG )
232 ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
234 WriteModprobeConfig.Run( self.VARS, self.LOG )
235 WriteNetworkConfig.Run( self.VARS, self.LOG )
236 CheckForNewDisks.Run( self.VARS, self.LOG )
237 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
238 ChainBootNode.Run( self.VARS, self.LOG )
240 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
242 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
244 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
250 # starting the fallback/debug ssh daemon for safety:
251 # if the node install somehow hangs, or if it simply takes ages,
252 # we can still enter and investigate
254 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
258 # implements the reinstall logic, which will check whether
259 # the min. hardware requirements are met, install the
260 # software, and upon correct installation will switch too
261 # 'boot' state and chainboot into the production system
262 if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
263 self.VARS['RUN_LEVEL']= 'failboot'
264 raise BootManagerException, "Hardware requirements not met."
267 InstallInit.Run( self.VARS, self.LOG )
268 InstallPartitionDisks.Run( self.VARS, self.LOG )
269 InstallBootstrapFS.Run( self.VARS, self.LOG )
270 InstallWriteConfig.Run( self.VARS, self.LOG )
271 InstallUninitHardware.Run( self.VARS, self.LOG )
272 self.VARS['BOOT_STATE']= 'boot'
273 self.VARS['STATE_CHANGE_NOTIFY']= 1
274 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
275 notify_messages.MSG_INSTALL_FINISHED
276 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
280 # implements the new install logic, which will first check
281 # with the user whether it is ok to install on this
282 # machine, switch to 'reinstall' state and then invoke the reinstall
283 # logic. See reinstallState logic comments for further
285 if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
287 self.VARS['BOOT_STATE']= 'reinstall'
288 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
291 def _debugRun(state='failboot'):
292 # implements debug logic, which starts the sshd and just waits around
293 self.VARS['RUN_LEVEL']=state
294 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
295 StartDebug.Run( self.VARS, self.LOG )
296 # fsck/mount fs if present, and ignore return value if it's not.
297 ValidateNodeInstall.Run( self.VARS, self.LOG )
300 # should never happen; log event
301 self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
304 # setup state -> function hash table
305 BootManager.NodeRunStates['reinstall'] = _reinstallRun
306 BootManager.NodeRunStates['boot'] = _bootRun
307 BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot')
308 BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled')
312 InitializeBootManager.Run( self.VARS, self.LOG )
313 ReadNodeConfiguration.Run( self.VARS, self.LOG )
314 AuthenticateWithPLC.Run( self.VARS, self.LOG )
315 StartRunlevelAgent.Run( self.VARS, self.LOG )
316 GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
318 # override machine's current state from the command line
319 if self.forceState is not None:
320 self.VARS['BOOT_STATE']= self.forceState
321 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
322 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
324 stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
329 self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
330 except BootManagerException, e:
331 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
332 except BootManagerAuthenticationException, e:
333 self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
334 # sets /tmp/CANCEL_BOOT flag
335 StartDebug.Run(self.VARS, self.LOG )
336 # Return immediately b/c any other calls to API will fail
339 self.LOG.write( "\n\nImplementation Error\n")
340 traceback.print_exc(file=self.LOG.OutputFile)
341 traceback.print_exc()
346 except BootManagerException, e:
347 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
349 self.LOG.write( "\n\nImplementation Error\n")
350 traceback.print_exc(file=self.LOG.OutputFile)
351 traceback.print_exc()
359 utils.prompt_for_breakpoint_mode()
361 utils.breakpoint ("Entering BootManager::main")
363 # set to 1 if error occurred
366 # all output goes through this class so we can save it and post
367 # the data back to PlanetLab central
368 LOG= log( BM_NODE_LOG )
370 # NOTE: assume CWD is BM's source directory, but never fail
371 utils.sysexec("./setup_bash_history_scripts.sh || /bin/true", LOG)
373 LOG.LogEntry( "BootManager started at: %s" % \
374 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
380 if BootManager.NodeRunStates.has_key(fState):
383 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
386 traceback.print_exc(file=LOG.OutputFile)
387 traceback.print_exc()
390 LOG.LogEntry( "BootManager finished at: %s" % \
391 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
396 bm= BootManager(LOG,forceState)
398 LOG.LogEntry( "Unable to initialize BootManager." )
400 LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
403 LOG.LogEntry( "\nDone!" );
405 LOG.LogEntry( "\nError occurred!" );
408 traceback.print_exc(file=LOG.OutputFile)
409 traceback.print_exc()
411 LOG.LogEntry( "BootManager finished at: %s" % \
412 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
418 if __name__ == "__main__":
419 error = main(sys.argv)