X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=source%2FBootManager.py;fp=source%2FBootManager.py;h=3c3f1d4a8557b07af0f86f90d4be01fe661ca120;hb=7cd82eb93512460e8dd748f9737648aabf05f70d;hp=0000000000000000000000000000000000000000;hpb=0ead051299a3094173442795ee262b0900a71ecd;p=bootmanager.git diff --git a/source/BootManager.py b/source/BootManager.py new file mode 100755 index 0000000..3c3f1d4 --- /dev/null +++ b/source/BootManager.py @@ -0,0 +1,416 @@ +#!/usr/bin/python -u +# +# Copyright (c) 2003 Intel Corporation +# All rights reserved. +# +# Copyright (c) 2004-2006 The Trustees of Princeton University +# All rights reserved. + +import string +import sys, os, traceback +import time +import gzip + +from steps import * +from Exceptions import * +import notify_messages +import BootServerRequest +import utils + +# all output is written to this file +BM_NODE_LOG= "/tmp/bm.log" +VARS_FILE = "configuration" + +# the new contents of PATH when the boot manager is running +BIN_PATH= ('/usr/local/bin', + '/usr/local/sbin', + '/usr/bin', + '/usr/sbin', + '/bin', + '/sbin') + +def read_configuration_file(filename): + # read in and store all variables in VARS_FILE into each line + # is in the format name=val (any whitespace around the = is + # removed. everything after the = to the end of the line is + # the value + vars = {} + vars_file= file(filename,'r') + validConfFile = True + for line in vars_file: + # if its a comment or a whitespace line, ignore + if line[:1] == "#" or string.strip(line) == "": + continue + + parts= string.split(line,"=") + if len(parts) != 2: + validConfFile = False + raise Exception( "Invalid line in vars file: %s" % line ) + + name= string.strip(parts[0]) + value= string.strip(parts[1]) + value= value.replace("'", "") # remove quotes + value= value.replace('"', "") # remove quotes + vars[name]= value + + vars_file.close() + if not validConfFile: + raise Exception( "Unable to read configuration vars." ) + + # find out which directory we are running it, and set a variable + # for that. future steps may need to get files out of the bootmanager + # directory + current_dir= os.getcwd() + vars['BM_SOURCE_DIR']= current_dir + + return vars + +############################## +class log: + + format="%H:%M:%S(%Z) " + + def __init__( self, OutputFilePath= None ): + try: + self.OutputFile= open( OutputFilePath, "w") + self.OutputFilePath= OutputFilePath + except: + print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath ) + self.OutputFile= None + + self.VARS = None + try: + vars = read_configuration_file(VARS_FILE) + self.VARS = vars + except Exception, e: + self.LogEntry( str(e) ) + return + + def LogEntry( self, str, inc_newline= 1, display_screen= 1 ): + now=time.strftime(log.format, time.localtime()) + if self.OutputFile: + self.OutputFile.write( now+str ) + if display_screen: + sys.stdout.write( now+str ) + + if inc_newline: + if display_screen: + sys.stdout.write( "\n" ) + if self.OutputFile: + self.OutputFile.write( "\n" ) + + if self.OutputFile: + self.OutputFile.flush() + + def write( self, str ): + """ + make log behave like a writable file object (for traceback + prints) + """ + self.LogEntry( str, 0, 1 ) + + # bm log uploading is available back again, as of nodeconfig-5.0-2 + def Upload( self, extra_file=None ): + """ + upload the contents of the log to the server + """ + if self.OutputFile is not None: + self.OutputFile.flush() + + self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] ) + + self.OutputFile.close() + self.OutputFile= None + + hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \ + self.VARS['INTERFACE_SETTINGS']['domainname'] + bs_request = BootServerRequest.BootServerRequest(self.VARS) + try: + # this was working until f10 + bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'], + GetVars = None, PostVars = None, + DoSSL = True, DoCertCheck = True, + FormData = ["log=@" + self.OutputFilePath, + "hostname=" + hostname, + "type=bm.log"]) + except: + # new pycurl + import pycurl + bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'], + GetVars = None, PostVars = None, + DoSSL = True, DoCertCheck = True, + FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)), + ("hostname",hostname), + ("type","bm.log")]) + if extra_file is not None: + # NOTE: for code-reuse, evoke the bash function 'upload_logs'; + # by adding --login, bash reads .bash_profile before execution. + # Also, never fail, since this is an optional feature. + utils.sysexec_noerr( """bash --login -c "upload_logs %s" """ % extra_file, self) + + +############################## +class BootManager: + + # file containing initial variables/constants + + # the set of valid node run states + NodeRunStates = {'reinstall':None, + 'boot':None, + 'safeboot':None, + 'disabled':None, + } + + def __init__(self, log, forceState): + # override machine's current state from the command line + self.forceState = forceState + + # the main logging point + self.LOG= log + + # set to 1 if we can run after initialization + self.CAN_RUN = 0 + + if log.VARS: + # this contains a set of information used and updated by each step + self.VARS= log.VARS + else: + return + + # not sure what the current PATH is set to, replace it with what + # we know will work with all the boot cds + os.environ['PATH']= string.join(BIN_PATH,":") + + self.CAN_RUN= 1 + + def Run(self): + """ + core boot manager logic. + + the way errors are handled is as such: if any particular step + cannot continue or unexpectibly fails, an exception is thrown. + in this case, the boot manager cannot continue running. + + these step functions can also return a 0/1 depending on whether + or not it succeeded. In the case of steps like ConfirmInstallWithUser, + a 0 is returned and no exception is thrown if the user chose not + to confirm the install. The same goes with the CheckHardwareRequirements. + If requriements not met, but tests were succesfull, return 0. + + for steps that run within the installer, they are expected to either + complete succesfully and return 1, or throw an execption. + + For exact return values and expected operations, see the comments + at the top of each of the invididual step functions. + """ + + def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'): + # called by the _xxxState() functions below upon failure + self.VARS['RUN_LEVEL']= 'failboot' + notify = getattr(notify_messages, message) + self.VARS['STATE_CHANGE_NOTIFY']= 1 + self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify + raise BootManagerException, notify + + def _bootRun(): + # implements the boot logic, which consists of first + # double checking that the node was properly installed, + # checking whether someone added or changed disks, and + # then finally chain boots. + + # starting the fallback/debug ssh daemon for safety: + # if the node install somehow hangs, or if it simply takes ages, + # we can still enter and investigate + try: + StartDebug.Run(self.VARS, self.LOG, last_resort = False) + except: + pass + + InstallInit.Run( self.VARS, self.LOG ) + ret = ValidateNodeInstall.Run( self.VARS, self.LOG ) + if ret == 1: + WriteModprobeConfig.Run( self.VARS, self.LOG ) + WriteNetworkConfig.Run( self.VARS, self.LOG ) + CheckForNewDisks.Run( self.VARS, self.LOG ) + SendHardwareConfigToPLC.Run( self.VARS, self.LOG ) + ChainBootNode.Run( self.VARS, self.LOG ) + elif ret == -1: + _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT') + elif ret == -2: + _nodeNotInstalled('MSG_NODE_MOUNT_FAILED') + elif ret == -3: + _nodeNotInstalled('MSG_NODE_MISSING_KERNEL') + else: + _nodeNotInstalled() + + def _reinstallRun(): + + # starting the fallback/debug ssh daemon for safety: + # if the node install somehow hangs, or if it simply takes ages, + # we can still enter and investigate + try: + StartDebug.Run(self.VARS, self.LOG, last_resort = False) + except: + pass + + # implements the reinstall logic, which will check whether + # the min. hardware requirements are met, install the + # software, and upon correct installation will switch too + # 'boot' state and chainboot into the production system + if not CheckHardwareRequirements.Run( self.VARS, self.LOG ): + self.VARS['RUN_LEVEL']= 'failboot' + raise BootManagerException, "Hardware requirements not met." + + # runinstaller + InstallInit.Run( self.VARS, self.LOG ) + InstallPartitionDisks.Run( self.VARS, self.LOG ) + InstallBootstrapFS.Run( self.VARS, self.LOG ) + InstallWriteConfig.Run( self.VARS, self.LOG ) + InstallUninitHardware.Run( self.VARS, self.LOG ) + self.VARS['BOOT_STATE']= 'boot' + self.VARS['STATE_CHANGE_NOTIFY']= 1 + self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \ + notify_messages.MSG_INSTALL_FINISHED + UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) + _bootRun() + + def _installRun(): + # implements the new install logic, which will first check + # with the user whether it is ok to install on this + # machine, switch to 'reinstall' state and then invoke the reinstall + # logic. See reinstallState logic comments for further + # details. + if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ): + return 0 + self.VARS['BOOT_STATE']= 'reinstall' + _reinstallRun() + + def _debugRun(state='failboot'): + # implements debug logic, which starts the sshd and just waits around + self.VARS['RUN_LEVEL']=state + StartDebug.Run( self.VARS, self.LOG ) + # fsck/mount fs if present, and ignore return value if it's not. + ValidateNodeInstall.Run( self.VARS, self.LOG ) + + def _badstateRun(): + # should never happen; log event + self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE']) + _debugRun() + + # setup state -> function hash table + BootManager.NodeRunStates['reinstall'] = _reinstallRun + BootManager.NodeRunStates['boot'] = _bootRun + BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot') + BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled') + + success = 0 + try: + InitializeBootManager.Run( self.VARS, self.LOG ) + ReadNodeConfiguration.Run( self.VARS, self.LOG ) + AuthenticateWithPLC.Run( self.VARS, self.LOG ) + UpdateLastBootOnce.Run( self.VARS, self.LOG ) + StartRunlevelAgent.Run( self.VARS, self.LOG ) + GetAndUpdateNodeDetails.Run( self.VARS, self.LOG ) + + # override machine's current state from the command line + if self.forceState is not None: + self.VARS['BOOT_STATE']= self.forceState + UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) + + stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun) + stateRun() + success = 1 + + except KeyError, e: + self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) ) + except BootManagerException, e: + self.LOG.write( "\n\nException while running: %s\n" % str(e) ) + except BootManagerAuthenticationException, e: + self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) ) + # sets /tmp/CANCEL_BOOT flag + StartDebug.Run(self.VARS, self.LOG ) + # Return immediately b/c any other calls to API will fail + return success + except: + self.LOG.write( "\n\nImplementation Error\n") + traceback.print_exc(file=self.LOG.OutputFile) + traceback.print_exc() + + if not success: + try: + _debugRun() + except BootManagerException, e: + self.LOG.write( "\n\nException while running: %s\n" % str(e) ) + except: + self.LOG.write( "\n\nImplementation Error\n") + traceback.print_exc(file=self.LOG.OutputFile) + traceback.print_exc() + + return success + + +def main(argv): + + import utils + utils.prompt_for_breakpoint_mode() + +# utils.breakpoint ("Entering BootManager::main") + + # set to 1 if error occurred + error= 0 + + # all output goes through this class so we can save it and post + # the data back to PlanetLab central + LOG= log( BM_NODE_LOG ) + + # NOTE: assume CWD is BM's source directory, but never fail + utils.sysexec_noerr("./setup_bash_history_scripts.sh", LOG) + + LOG.LogEntry( "BootManager started at: %s" % \ + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) ) + + try: + forceState = None + if len(argv) == 2: + fState = argv[1] + if BootManager.NodeRunStates.has_key(fState): + forceState = fState + else: + LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState) + error = 1 + except: + traceback.print_exc(file=LOG.OutputFile) + traceback.print_exc() + + if error: + LOG.LogEntry( "BootManager finished at: %s" % \ + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) ) + LOG.Upload() + return error + + try: + bm= BootManager(LOG,forceState) + if bm.CAN_RUN == 0: + LOG.LogEntry( "Unable to initialize BootManager." ) + else: + LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] ) + success= bm.Run() + if success: + LOG.LogEntry( "\nDone!" ); + else: + LOG.LogEntry( "\nError occurred!" ); + error = 1 + except: + traceback.print_exc(file=LOG.OutputFile) + traceback.print_exc() + + LOG.LogEntry( "BootManager finished at: %s" % \ + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) ) + LOG.Upload() + + return error + + +if __name__ == "__main__": + error = main(sys.argv) + sys.exit(error)