X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=source%2FBootManager.py;h=2f4f74f05c37dd54e72c76c17d994e4d49f31b86;hb=f32694ab573424585b4758bc12d88fb89b41fc51;hp=fdda2a3ac87346520be5d281ca70b1bd48ecdfce;hpb=4c6ae1342e6544e6206ae633fcc71d2e25caac07;p=bootmanager.git diff --git a/source/BootManager.py b/source/BootManager.py index fdda2a3..2f4f74f 100755 --- a/source/BootManager.py +++ b/source/BootManager.py @@ -1,203 +1,196 @@ -#!/usr/bin/python2 -u - -# ------------------------------------------------------------------------ -# THIS file used to be named alpina.py, from the node installer. Since then -# the installer has been expanded to include all the functions of the boot -# manager as well, hence the new name for this file. -# ------------------------------------------------------------------------ - +#!/usr/bin/python -u +# # Copyright (c) 2003 Intel Corporation # All rights reserved. +# +# Copyright (c) 2004-2006 The Trustees of Princeton University +# All rights reserved. -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: - -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. - -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. - -# * Neither the name of the Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. - -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF -# YOUR JURISDICTION. It is licensee's responsibility to comply with any -# export regulations applicable in licensee's jurisdiction. Under -# CURRENT (May 2000) U.S. export regulations this software is eligible -# for export from the U.S. and can be downloaded by or otherwise -# exported or reexported worldwide EXCEPT to U.S. embargoed destinations -# which include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan, -# Afghanistan and any other country to which the U.S. has embargoed -# goods and services. - - +import sys, os +import traceback import string -import sys, os, traceback -from time import gmtime, strftime -from gzip import GzipFile +import time +import gzip from steps import * from Exceptions import * import notify_messages - - +import BootServerRequest +import utils # all output is written to this file -LOG_FILE= "/tmp/bm.log" -CURL_PATH= "curl" -UPLOAD_LOG_URL = "http://boot.planet-lab.org/alpina-logs/upload.php" +BM_NODE_LOG = "/tmp/bm.log" +VARS_FILE = "configuration" # the new contents of PATH when the boot manager is running -BIN_PATH= ('/usr/local/bin', - '/usr/local/sbin', - '/bin', - '/sbin', - '/usr/bin', - '/usr/sbin', - '/usr/local/planetlab/bin') - - - +BIN_PATH = ('/usr/local/bin', + '/usr/local/sbin', + '/usr/bin', + '/usr/sbin', + '/bin', + '/sbin') + +def read_configuration_file(filename): + # read in and store all variables in VARS_FILE into each line + # is in the format name=val (any whitespace around the = is + # removed. everything after the = to the end of the line is + # the value + vars = {} + vars_file = file(filename,'r') + validConfFile = True + for line in vars_file: + # if its a comment or a whitespace line, ignore + if line[:1] == "#" or string.strip(line) == "": + continue + + parts = string.split(line, "=") + if len(parts) != 2: + validConfFile = False + raise Exception("Invalid line in vars file: {}".format(line)) + + name = string.strip(parts[0]) + value = string.strip(parts[1]) + value = value.replace("'", "") # remove quotes + value = value.replace('"', "") # remove quotes + vars[name] = value + + vars_file.close() + if not validConfFile: + raise Exception("Unable to read configuration vars.") + + # find out which directory we are running it, and set a variable + # for that. future steps may need to get files out of the bootmanager + # directory + current_dir = os.getcwd() + vars['BM_SOURCE_DIR'] = current_dir + + return vars + +############################## class log: - def __init__( self, OutputFilePath= None ): - if OutputFilePath: - try: - self.OutputFilePath= OutputFilePath - self.OutputFile= GzipFile( OutputFilePath, "w", 9 ) - except: - print( "Unable to open output file for log, continuing" ) - self.OutputFile= None + format = "%H:%M:%S(%Z) " + def __init__(self, OutputFilePath=None): + try: + self.OutputFile = open(OutputFilePath, "w") + self.OutputFilePath = OutputFilePath + except: + print("bootmanager log : Unable to open output file {}, continuing"\ + .format(OutputFilePath)) + self.OutputFile = None + + self.VARS = None + try: + vars = read_configuration_file(VARS_FILE) + self.VARS = vars + except Exception, e: + self.LogEntry(str(e)) + return - def LogEntry( self, str, inc_newline= 1, display_screen= 1 ): + def LogEntry(self, str, inc_newline = 1, display_screen = 1): + now = time.strftime(log.format, time.localtime()) if self.OutputFile: - self.OutputFile.write( str ) + self.OutputFile.write(now + str) if display_screen: - sys.stdout.write( str ) + sys.stdout.write(now + str) if inc_newline: if display_screen: - sys.stdout.write( "\n" ) + sys.stdout.write("\n") if self.OutputFile: - self.OutputFile.write( "\n" ) + self.OutputFile.write("\n") if self.OutputFile: self.OutputFile.flush() - - - def write( self, str ): + def write(self, str): """ make log behave like a writable file object (for traceback prints) """ - self.LogEntry( str, 0, 1 ) - - + self.LogEntry(str, 0, 1) - def Upload( self ): + def print_stack(self): """ - upload the contents of the log to the server + dump current stack in log """ + self.write(traceback.format_exc()) + # bm log uploading is available back again, as of nodeconfig-5.0-2 + def Upload(self, extra_file=None): + """ + upload the contents of the log to the server + """ if self.OutputFile is not None: - self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_URL ) + self.OutputFile.flush() + + self.LogEntry("Uploading logs to {}".format(self.VARS['UPLOAD_LOG_SCRIPT'])) self.OutputFile.close() - self.OutputFile= None - - curl_cmd= "%s -s --connect-timeout 60 --max-time 600 " \ - "--form log=@%s %s" % \ - (CURL_PATH, self.OutputFilePath, UPLOAD_LOG_URL) - os.system( curl_cmd ) - - - - - + self.OutputFile = None + hostname = self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \ + self.VARS['INTERFACE_SETTINGS']['domainname'] + bs_request = BootServerRequest.BootServerRequest(self.VARS) + try: + # this was working until f10 + bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'], + GetVars = None, PostVars = None, + DoSSL = True, DoCertCheck = True, + FormData = ["log=@" + self.OutputFilePath, + "hostname=" + hostname, + "type=bm.log"]) + except: + # new pycurl + import pycurl + bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'], + GetVars = None, PostVars = None, + DoSSL = True, DoCertCheck = True, + FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)), + ("hostname",hostname), + ("type","bm.log")]) + if extra_file is not None: + # NOTE: for code-reuse, evoke the bash function 'upload_logs'; + # by adding --login, bash reads .bash_profile before execution. + # Also, never fail, since this is an optional feature. + utils.sysexec_noerr("""bash --login -c "upload_logs {}" """.format(extra_file), self) + + +############################## class BootManager: # file containing initial variables/constants - VARS_FILE = "configuration" + # the set of valid node run states + NodeRunStates = {'reinstall' : None, + 'upgrade' : None, + 'boot' : None, + 'safeboot' : None, + 'disabled' : None, + } - def __init__(self, log): - # this contains a set of information used and updated - # by each step - self.VARS= {} + def __init__(self, log, forceState): + # override machine's current state from the command line + self.forceState = forceState # the main logging point - self.LOG= log + self.LOG = log # set to 1 if we can run after initialization self.CAN_RUN = 0 - - if not self.ReadBMConf(): - self.LOG.LogEntry( "Unable to read configuration vars." ) - return - - # find out which directory we are running it, and set a variable - # for that. future steps may need to get files out of the bootmanager - # directory - current_dir= os.getcwd() - self.VARS['BM_SOURCE_DIR']= current_dir + if log.VARS: + # this contains a set of information used and updated by each step + self.VARS = log.VARS + else: + return + # not sure what the current PATH is set to, replace it with what # we know will work with all the boot cds - os.environ['PATH']= string.join(BIN_PATH,":") - - self.CAN_RUN= 1 - - - - - def ReadBMConf(self): - """ - read in and store all variables in VARS_FILE into - self.VARS - - each line is in the format name=val (any whitespace around - the = is removed. everything after the = to the end of - the line is the value - """ - - vars_file= file(self.VARS_FILE,'r') - for line in vars_file: - # if its a comment or a whitespace line, ignore - if line[:1] == "#" or string.strip(line) == "": - continue - - parts= string.split(line,"=") - if len(parts) != 2: - self.LOG.LogEntry( "Invalid line in vars file: %s" % line ) - return 0 + os.environ['PATH'] = string.join(BIN_PATH,":") - name= string.strip(parts[0]) - value= string.strip(parts[1]) - - self.VARS[name]= value - - return 1 - + self.CAN_RUN = 1 def Run(self): """ @@ -214,145 +207,232 @@ class BootManager: If requriements not met, but tests were succesfull, return 0. for steps that run within the installer, they are expected to either - complete succesfully and return 1, or throw an execption. + complete succesfully and return 1, or throw an exception. For exact return values and expected operations, see the comments at the top of each of the invididual step functions. """ - - try: - InitializeBootManager.Run( self.VARS, self.LOG ) - ReadNodeConfiguration.Run( self.VARS, self.LOG ) - AuthenticateWithPLC.Run( self.VARS, self.LOG ) - GetAndUpdateNodeDetails.Run( self.VARS, self.LOG ) - - if self.VARS['BOOT_STATE'] == 'new' or \ - self.VARS['BOOT_STATE'] == 'inst': - if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ): - return 0 - - self.VARS['BOOT_STATE']= 'rins' - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) - - if not CheckHardwareRequirements.Run( self.VARS, self.LOG ): - self.VARS['BOOT_STATE']= 'dbg' - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) - raise BootManagerException, "Hardware requirements not met." - - self.RunInstaller() - - if ValidateNodeInstall.Run( self.VARS, self.LOG ): - SendHardwareConfigToPLC.Run( self.VARS, self.LOG ) - ChainBootNode.Run( self.VARS, self.LOG ) - else: - self.VARS['BOOT_STATE']= 'dbg' - self.VARS['STATE_CHANGE_NOTIFY']= 1 - self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \ - notify_messages.MSG_NODE_NOT_INSTALLED - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) - - - elif self.VARS['BOOT_STATE'] == 'rins': - if not CheckHardwareRequirements.Run( self.VARS, self.LOG ): - self.VARS['BOOT_STATE']= 'dbg' - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) - raise BootManagerException, "Hardware requirements not met." - - self.RunInstaller() - - if ValidateNodeInstall.Run( self.VARS, self.LOG ): - SendHardwareConfigToPLC.Run( self.VARS, self.LOG ) - ChainBootNode.Run( self.VARS, self.LOG ) - else: - self.VARS['BOOT_STATE']= 'dbg' - self.VARS['STATE_CHANGE_NOTIFY']= 1 - self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \ - notify_messages.MSG_NODE_NOT_INSTALLED - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) - - elif self.VARS['BOOT_STATE'] == 'boot': - if ValidateNodeInstall.Run( self.VARS, self.LOG ): - UpdateNodeConfiguration.Run( self.VARS, self.LOG ) - CheckForNewDisks.Run( self.VARS, self.LOG ) - SendHardwareConfigToPLC.Run( self.VARS, self.LOG ) - ChainBootNode.Run( self.VARS, self.LOG ) - else: - self.VARS['BOOT_STATE']= 'dbg' - self.VARS['STATE_CHANGE_NOTIFY']= 1 - self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \ - notify_messages.MSG_NODE_NOT_INSTALLED - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) - - elif self.VARS['BOOT_STATE'] == 'dbg': - StartDebug.Run( self.VARS, self.LOG ) - - except KeyError, e: - self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) ) - except BootManagerException, e: - self.LOG.write( "\n\nException while running: %s\n" % str(e) ) - - return 1 + + def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'): + # called by the _xxxState() functions below upon failure + self.VARS['RUN_LEVEL'] = 'failboot' + notify = getattr(notify_messages, message) + self.VARS['STATE_CHANGE_NOTIFY'] = 1 + self.VARS['STATE_CHANGE_NOTIFY_MESSAGE'] = notify + raise BootManagerException, notify + + def _bootRun(): + # implements the boot logic, which consists of first + # double checking that the node was properly installed, + # checking whether someone added or changed disks, and + # then finally chain boots. + + # starting the fallback/debug ssh daemon for safety: + # if the node install somehow hangs, or if it simply takes ages, + # we can still enter and investigate + try: + StartDebug.Run(self.VARS, self.LOG, last_resort = False) + except: + pass + + InstallInit.Run(self.VARS, self.LOG) + ret = ValidateNodeInstall.Run(self.VARS, self.LOG) + if ret == 1: +# Thierry - feb. 2013 turning off WriteModprobeConfig for now on lxc +# for one thing this won't work at all with f18, as modules.pcimap +# has disappeared (Daniel suggested modules.aliases could be used instead) +# and second, in any case it's been years now that modprobe.conf was deprecated +# so most likely this code has no actual effect + if self.VARS['virt'] == 'vs': + WriteModprobeConfig.Run(self.VARS, self.LOG) + WriteNetworkConfig.Run(self.VARS, self.LOG) + CheckForNewDisks.Run(self.VARS, self.LOG) + SendHardwareConfigToPLC.Run(self.VARS, self.LOG) + ChainBootNode.Run(self.VARS, self.LOG) + elif ret == -1: + _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT') + elif ret == -2: + _nodeNotInstalled('MSG_NODE_MOUNT_FAILED') + elif ret == -3: + _nodeNotInstalled('MSG_NODE_MISSING_KERNEL') + else: + _nodeNotInstalled() + + def _reinstallRun(upgrade=False): + + # starting the fallback/debug ssh daemon for safety: + # if the node install somehow hangs, or if it simply takes ages, + # we can still enter and investigate + try: + StartDebug.Run(self.VARS, self.LOG, last_resort = False) + except: + pass + + # implements the reinstall logic, which will check whether + # the min. hardware requirements are met, install the + # software, and upon correct installation will switch too + # 'boot' state and chainboot into the production system + if not CheckHardwareRequirements.Run(self.VARS, self.LOG): + self.VARS['RUN_LEVEL'] = 'failboot' + raise BootManagerException, "Hardware requirements not met." + + # runinstaller + InstallInit.Run(self.VARS, self.LOG) + if not upgrade: + InstallPartitionDisks.Run(self.VARS, self.LOG) + InstallBootstrapFS.Run(self.VARS, self.LOG) + InstallWriteConfig.Run(self.VARS, self.LOG) + InstallUninitHardware.Run(self.VARS, self.LOG) + self.VARS['BOOT_STATE'] = 'boot' + self.VARS['STATE_CHANGE_NOTIFY'] = 1 + self.VARS['STATE_CHANGE_NOTIFY_MESSAGE'] = \ + notify_messages.MSG_INSTALL_FINISHED + AnsibleHook.Run(self.VARS, self.LOG) + UpdateBootStateWithPLC.Run(self.VARS, self.LOG) + _bootRun() + def _installRun(): + # implements the new install logic, which will first check + # with the user whether it is ok to install on this + # machine, switch to 'reinstall' state and then invoke the reinstall + # logic. See reinstallState logic comments for further + # details. + if not ConfirmInstallWithUser.Run(self.VARS, self.LOG): + return 0 + self.VARS['BOOT_STATE'] = 'reinstall' + + AnsibleHook.Run(self.VARS, self.LOG) + _reinstallRun() + + def _debugRun(state='failboot'): + # implements debug logic, which starts the sshd and just waits around + self.VARS['RUN_LEVEL'] = state + StartDebug.Run(self.VARS, self.LOG) + # fsck/mount fs if present, and ignore return value if it's not. + ValidateNodeInstall.Run(self.VARS, self.LOG) + + def _badstateRun(): + # should never happen; log event + self.LOG.write("\nInvalid BOOT_STATE = {}\n".format(self.VARS['BOOT_STATE'])) + _debugRun() + + # setup state -> function hash table + BootManager.NodeRunStates['reinstall'] = lambda : _reinstallRun(upgrade=False) + BootManager.NodeRunStates['upgrade'] = lambda : _reinstallRun(upgrade=True) + BootManager.NodeRunStates['boot'] = _bootRun + BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot') + BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled') + + success = 0 + try: + InitializeBootManager.Run(self.VARS, self.LOG) + ReadNodeConfiguration.Run(self.VARS, self.LOG) + AuthenticateWithPLC.Run(self.VARS, self.LOG) + UpdateLastBootOnce.Run(self.VARS, self.LOG) + StartRunlevelAgent.Run(self.VARS, self.LOG) + GetAndUpdateNodeDetails.Run(self.VARS, self.LOG) + + # override machine's current state from the command line + if self.forceState is not None: + self.VARS['BOOT_STATE'] = self.forceState + UpdateBootStateWithPLC.Run(self.VARS, self.LOG) + + stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'], _badstateRun) + stateRun() + success = 1 + + except KeyError as e: + self.LOG.write("\n\nKeyError while running: {}\n".format(e)) + self.LOG.print_stack () + except BootManagerException as e: + self.LOG.write("\n\nException while running: {}\n".format(e)) + self.LOG.print_stack () + except BootManagerAuthenticationException as e: + self.LOG.write("\n\nFailed to Authenticate Node: {}\n".format(e)) + self.LOG.print_stack () + # sets /tmp/CANCEL_BOOT flag + StartDebug.Run(self.VARS, self.LOG) + # Return immediately b/c any other calls to API will fail + return success + except: + self.LOG.write("\n\nImplementation Error\n") + self.LOG.print_stack () + + if not success: + try: + _debugRun() + except BootManagerException, e: + self.LOG.write("\n\nException while running: {}\n".format(e)) + except: + self.LOG.write("\n\nImplementation Error\n") + traceback.print_exc(file=self.LOG.OutputFile) + traceback.print_exc() + return success - def RunInstaller(self): - """ - since the installer can be invoked at more than one place - in the boot manager logic, seperate the steps necessary - to do it here - """ - - InstallInit.Run( self.VARS, self.LOG ) - InstallPartitionDisks.Run( self.VARS, self.LOG ) - InstallBootstrapRPM.Run( self.VARS, self.LOG ) - InstallBase.Run( self.VARS, self.LOG ) - InstallWriteConfig.Run( self.VARS, self.LOG ) - InstallBuildVServer.Run( self.VARS, self.LOG ) - InstallNodeInit.Run( self.VARS, self.LOG ) - InstallUninitHardware.Run( self.VARS, self.LOG ) - - self.VARS['BOOT_STATE']= 'boot' - self.VARS['STATE_CHANGE_NOTIFY']= 1 - self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \ - notify_messages.MSG_INSTALL_FINISHED - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) + +def main(argv): - SendHardwareConfigToPLC.Run( self.VARS, self.LOG ) + import utils + utils.prompt_for_breakpoint_mode() +# utils.breakpoint ("Entering BootManager::main") - -if __name__ == "__main__": - - # set to 0 if no error occurred - error= 1 + # set to 1 if error occurred + error = 0 # all output goes through this class so we can save it and post # the data back to PlanetLab central - LOG= log( LOG_FILE ) + LOG = log(BM_NODE_LOG) + + # NOTE: assume CWD is BM's source directory, but never fail + utils.sysexec_noerr("./setup_bash_history_scripts.sh", LOG) + + LOG.LogEntry("BootManager started at: {}"\ + .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()))) - LOG.LogEntry( "BootManager started at: %s" % \ - strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) ) + try: + forceState = None + if len(argv) == 2: + fState = argv[1] + if BootManager.NodeRunStates.has_key(fState): + forceState = fState + else: + LOG.LogEntry("FATAL: cannot force node run state to={}".format(fState)) + error = 1 + except: + traceback.print_exc(file=LOG.OutputFile) + traceback.print_exc() + + if error: + LOG.LogEntry("BootManager finished at: {}"\ + .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()))) + LOG.Upload() + return error try: - bm= BootManager(LOG) + bm = BootManager(LOG, forceState) if bm.CAN_RUN == 0: - LOG.LogEntry( "Unable to initialize BootManager." ) + LOG.LogEntry("Unable to initialize BootManager.") else: - LOG.LogEntry( "Running version %s of BootManager." % - bm.VARS['VERSION'] ) - success= bm.Run() + LOG.LogEntry("Running version {} of BootManager.".format(bm.VARS['VERSION'])) + success = bm.Run() if success: - LOG.LogEntry( "\nDone!" ); + LOG.LogEntry("\nDone!"); else: - LOG.LogEntry( "\nError occurred!" ); - + LOG.LogEntry("\nError occurred!"); + error = 1 except: traceback.print_exc(file=LOG.OutputFile) traceback.print_exc() - LOG.LogEntry( "BootManager finished at: %s" % \ - strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) ) - + LOG.LogEntry("BootManager finished at: {}"\ + .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()))) LOG.Upload() + + return error + +if __name__ == "__main__": + error = main(sys.argv) sys.exit(error)