3 # Copyright (c) 2003 Intel Corporation
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
10 import sys, os, traceback
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
19 # all output is written to this file
20 BM_NODE_LOG= "/tmp/bm.log"
21 VARS_FILE = "configuration"
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
31 def read_configuration_file(filename):
32 # read in and store all variables in VARS_FILE into each line
33 # is in the format name=val (any whitespace around the = is
34 # removed. everything after the = to the end of the line is
37 vars_file= file(filename,'r')
39 for line in vars_file:
40 # if its a comment or a whitespace line, ignore
41 if line[:1] == "#" or string.strip(line) == "":
44 parts= string.split(line,"=")
47 raise Exception( "Invalid line in vars file: %s" % line )
49 name= string.strip(parts[0])
50 value= string.strip(parts[1])
51 value= value.replace("'", "") # remove quotes
52 value= value.replace('"', "") # remove quotes
57 raise Exception( "Unable to read configuration vars." )
59 # find out which directory we are running it, and set a variable
60 # for that. future steps may need to get files out of the bootmanager
62 current_dir= os.getcwd()
63 vars['BM_SOURCE_DIR']= current_dir
67 ##############################
70 format="%H:%M:%S(%Z) "
72 def __init__( self, OutputFilePath= None ):
74 self.OutputFile= open( OutputFilePath, "w")
75 self.OutputFilePath= OutputFilePath
77 print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
82 vars = read_configuration_file(VARS_FILE)
85 self.LogEntry( str(e) )
88 def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
89 now=time.strftime(log.format, time.localtime())
91 self.OutputFile.write( now+str )
93 sys.stdout.write( now+str )
97 sys.stdout.write( "\n" )
99 self.OutputFile.write( "\n" )
102 self.OutputFile.flush()
104 def write( self, str ):
106 make log behave like a writable file object (for traceback
109 self.LogEntry( str, 0, 1 )
111 # bm log uploading is available back again, as of nodeconfig-5.0-2
114 upload the contents of the log to the server
116 if self.OutputFile is not None:
117 self.OutputFile.flush()
119 self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] )
121 self.OutputFile.close()
122 self.OutputFile= None
124 hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
125 self.VARS['INTERFACE_SETTINGS']['domainname']
126 bs_request = BootServerRequest.BootServerRequest(self.VARS)
127 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
128 GetVars = None, PostVars = None,
129 FormData = ["log=@" + self.OutputFilePath,
130 "hostname=" + hostname, "type=bm.log"],
131 DoSSL = True, DoCertCheck = True)
133 ##############################
136 # file containing initial variables/constants
138 # the set of valid node run states
139 NodeRunStates = {'reinstall':None,
145 def __init__(self, log, forceState):
146 # override machine's current state from the command line
147 self.forceState = forceState
149 # the main logging point
152 # set to 1 if we can run after initialization
156 # this contains a set of information used and updated by each step
161 # not sure what the current PATH is set to, replace it with what
162 # we know will work with all the boot cds
163 os.environ['PATH']= string.join(BIN_PATH,":")
169 core boot manager logic.
171 the way errors are handled is as such: if any particular step
172 cannot continue or unexpectibly fails, an exception is thrown.
173 in this case, the boot manager cannot continue running.
175 these step functions can also return a 0/1 depending on whether
176 or not it succeeded. In the case of steps like ConfirmInstallWithUser,
177 a 0 is returned and no exception is thrown if the user chose not
178 to confirm the install. The same goes with the CheckHardwareRequirements.
179 If requriements not met, but tests were succesfull, return 0.
181 for steps that run within the installer, they are expected to either
182 complete succesfully and return 1, or throw an execption.
184 For exact return values and expected operations, see the comments
185 at the top of each of the invididual step functions.
188 def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
189 # called by the _xxxState() functions below upon failure
190 self.VARS['RUN_LEVEL']= 'failboot'
191 notify = getattr(notify_messages, message)
192 self.VARS['STATE_CHANGE_NOTIFY']= 1
193 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
194 raise BootManagerException, notify
197 # implements the boot logic, which consists of first
198 # double checking that the node was properly installed,
199 # checking whether someone added or changed disks, and
200 # then finally chain boots.
202 # starting the fallback/debug ssh daemon for safety:
203 # if the node install somehow hangs, or if it simply takes ages,
204 # we can still enter and investigate
206 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
210 InstallInit.Run( self.VARS, self.LOG )
211 ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
213 WriteModprobeConfig.Run( self.VARS, self.LOG )
214 MakeInitrd.Run( self.VARS, self.LOG )
215 WriteNetworkConfig.Run( self.VARS, self.LOG )
216 CheckForNewDisks.Run( self.VARS, self.LOG )
217 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
218 ChainBootNode.Run( self.VARS, self.LOG )
220 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
222 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
224 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
230 # starting the fallback/debug ssh daemon for safety:
231 # if the node install somehow hangs, or if it simply takes ages,
232 # we can still enter and investigate
234 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
238 # implements the reinstall logic, which will check whether
239 # the min. hardware requirements are met, install the
240 # software, and upon correct installation will switch too
241 # 'boot' state and chainboot into the production system
242 if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
243 self.VARS['RUN_LEVEL']= 'failboot'
244 raise BootManagerException, "Hardware requirements not met."
247 InstallInit.Run( self.VARS, self.LOG )
248 InstallPartitionDisks.Run( self.VARS, self.LOG )
249 InstallBootstrapFS.Run( self.VARS, self.LOG )
250 InstallWriteConfig.Run( self.VARS, self.LOG )
251 InstallUninitHardware.Run( self.VARS, self.LOG )
252 self.VARS['BOOT_STATE']= 'boot'
253 self.VARS['STATE_CHANGE_NOTIFY']= 1
254 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
255 notify_messages.MSG_INSTALL_FINISHED
256 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
260 # implements the new install logic, which will first check
261 # with the user whether it is ok to install on this
262 # machine, switch to 'reinstall' state and then invoke the reinstall
263 # logic. See reinstallState logic comments for further
265 if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
267 self.VARS['BOOT_STATE']= 'reinstall'
268 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
271 def _debugRun(state='failboot'):
272 # implements debug logic, which starts the sshd and just waits around
273 self.VARS['RUN_LEVEL']=state
274 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
275 StartDebug.Run( self.VARS, self.LOG )
276 # fsck/mount fs if present, and ignore return value if it's not.
277 ValidateNodeInstall.Run( self.VARS, self.LOG )
280 # should never happen; log event
281 self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
284 # setup state -> function hash table
285 BootManager.NodeRunStates['reinstall'] = _reinstallRun
286 BootManager.NodeRunStates['boot'] = _bootRun
287 BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot')
288 BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled')
292 InitializeBootManager.Run( self.VARS, self.LOG )
293 ReadNodeConfiguration.Run( self.VARS, self.LOG )
294 AuthenticateWithPLC.Run( self.VARS, self.LOG )
295 StartRunlevelAgent.Run( self.VARS, self.LOG )
296 GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
298 # override machine's current state from the command line
299 if self.forceState is not None:
300 self.VARS['BOOT_STATE']= self.forceState
301 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
302 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
304 stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
309 self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
310 except BootManagerException, e:
311 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
312 except BootManagerAuthenticationException, e:
313 self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
314 # sets /tmp/CANCEL_BOOT flag
315 StartDebug.Run(self.VARS, self.LOG )
316 # Return immediately b/c any other calls to API will fail
319 self.LOG.write( "\n\nImplementation Error\n")
320 traceback.print_exc(file=self.LOG.OutputFile)
321 traceback.print_exc()
326 except BootManagerException, e:
327 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
329 self.LOG.write( "\n\nImplementation Error\n")
330 traceback.print_exc(file=self.LOG.OutputFile)
331 traceback.print_exc()
339 utils.prompt_for_breakpoint_mode()
341 utils.breakpoint ("Entering BootManager::main")
343 # set to 1 if error occurred
346 # all output goes through this class so we can save it and post
347 # the data back to PlanetLab central
348 LOG= log( BM_NODE_LOG )
350 LOG.LogEntry( "BootManager started at: %s" % \
351 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
357 if BootManager.NodeRunStates.has_key(fState):
360 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
363 traceback.print_exc(file=LOG.OutputFile)
364 traceback.print_exc()
367 LOG.LogEntry( "BootManager finished at: %s" % \
368 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
373 bm= BootManager(LOG,forceState)
375 LOG.LogEntry( "Unable to initialize BootManager." )
377 LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
380 LOG.LogEntry( "\nDone!" );
382 LOG.LogEntry( "\nError occurred!" );
385 traceback.print_exc(file=LOG.OutputFile)
386 traceback.print_exc()
388 LOG.LogEntry( "BootManager finished at: %s" % \
389 time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
395 if __name__ == "__main__":
396 error = main(sys.argv)