X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=source%2FBootManager.py;h=7460fd8b13b26fbcd01107a8ec411181d280561b;hb=4eea76a0545774fae808b29d94227be100872bf7;hp=1439549815b4ec1ef3c3ff5fc9fadd62d1d5947a;hpb=e619ab76850eebe70f75ee9b6a7d320f7a3092ce;p=bootmanager.git diff --git a/source/BootManager.py b/source/BootManager.py index 1439549..7460fd8 100755 --- a/source/BootManager.py +++ b/source/BootManager.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 -u +#!/usr/bin/python -u # Copyright (c) 2003 Intel Corporation # All rights reserved. @@ -8,8 +8,8 @@ import string import sys, os, traceback -from time import gmtime, strftime -from gzip import GzipFile +import time +import gzip from steps import * from Exceptions import * @@ -17,39 +17,36 @@ import notify_messages import BootServerRequest # all output is written to this file -LOG_FILE= "/tmp/bm.log" -UPLOAD_LOG_PATH = "/alpina-logs/upload.php" +BM_NODE_LOG= "/tmp/bm.log" +UPLOAD_LOG_SCRIPT = "/boot/upload-bmlog.php" # the new contents of PATH when the boot manager is running BIN_PATH= ('/usr/local/bin', '/usr/local/sbin', - '/bin', - '/sbin', '/usr/bin', '/usr/sbin', - '/usr/local/planetlab/bin') + '/bin', + '/sbin') - -# the set of valid node run states -NodeRunStates = {} - +############################## class log: - def __init__( self, OutputFilePath= None ): - if OutputFilePath: - try: - self.OutputFilePath= OutputFilePath - self.OutputFile= GzipFile( OutputFilePath, "w", 9 ) - except: - print( "Unable to open output file for log, continuing" ) - self.OutputFile= None + format="%H:%M:%S(%Z) " + def __init__( self, OutputFilePath= None ): + try: + self.OutputFile= open( OutputFilePath, "w") + self.OutputFilePath= OutputFilePath + except: + print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath ) + self.OutputFile= None def LogEntry( self, str, inc_newline= 1, display_screen= 1 ): + now=time.strftime(log.format, time.localtime()) if self.OutputFile: - self.OutputFile.write( str ) + self.OutputFile.write( now+str ) if display_screen: - sys.stdout.write( str ) + sys.stdout.write( now+str ) if inc_newline: if display_screen: @@ -60,44 +57,44 @@ class log: if self.OutputFile: self.OutputFile.flush() - - def write( self, str ): """ make log behave like a writable file object (for traceback prints) """ self.LogEntry( str, 0, 1 ) - - + # bm log uploading is available back again, as of nodeconfig-5.0-2 def Upload( self ): """ upload the contents of the log to the server """ - if self.OutputFile is not None: - self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_PATH ) + self.OutputFile.flush() + + self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_SCRIPT ) self.OutputFile.close() self.OutputFile= None bs_request = BootServerRequest.BootServerRequest() - bs_request.MakeRequest(PartialPath = UPLOAD_LOG_PATH, + bs_request.MakeRequest(PartialPath = UPLOAD_LOG_SCRIPT, GetVars = None, PostVars = None, FormData = ["log=@" + self.OutputFilePath], DoSSL = True, DoCertCheck = True) - - - - - +############################## class BootManager: # file containing initial variables/constants VARS_FILE = "configuration" + # the set of valid node run states + NodeRunStates = {'reinstall':None, + 'boot':None, + 'safeboot':None, + 'disabled':None, + } def __init__(self, log, forceState): # override machine's current state from the command line @@ -146,8 +143,7 @@ class BootManager: # we know will work with all the boot cds os.environ['PATH']= string.join(BIN_PATH,":") - # this contains a set of information used and updated - # by each step + # this contains a set of information used and updated by each step self.VARS= vars self.CAN_RUN= 1 @@ -175,7 +171,7 @@ class BootManager: def _nodeNotInstalled(): # called by the _xxxState() functions below upon failure - self.VARS['BOOT_STATE']= 'failboot' + self.VARS['RUN_LEVEL']= 'failboot' self.VARS['STATE_CHANGE_NOTIFY']= 1 self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \ notify_messages.MSG_NODE_NOT_INSTALLED @@ -188,6 +184,14 @@ class BootManager: # checking whether someone added or changed disks, and # then finally chain boots. + # starting the fallback/debug ssh daemon for safety: + # if the node install somehow hangs, or if it simply takes ages, + # we can still enter and investigate + try: + StartDebug.Run(self.VARS, self.LOG, last_resort = False) + except: + pass + InstallInit.Run( self.VARS, self.LOG ) if ValidateNodeInstall.Run( self.VARS, self.LOG ): WriteModprobeConfig.Run( self.VARS, self.LOG ) @@ -200,12 +204,21 @@ class BootManager: _nodeNotInstalled() def _reinstallRun(): + + # starting the fallback/debug ssh daemon for safety: + # if the node install somehow hangs, or if it simply takes ages, + # we can still enter and investigate + try: + StartDebug.Run(self.VARS, self.LOG, last_resort = False) + except: + pass + # implements the reinstall logic, which will check whether # the min. hardware requirements are met, install the # software, and upon correct installation will switch too # 'boot' state and chainboot into the production system if not CheckHardwareRequirements.Run( self.VARS, self.LOG ): - self.VARS['BOOT_STATE']= 'failboot' + self.VARS['RUN_LEVEL']= 'failboot' raise BootManagerException, "Hardware requirements not met." # runinstaller @@ -221,7 +234,7 @@ class BootManager: UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) _bootRun() - def _newRun(): + def _installRun(): # implements the new install logic, which will first check # with the user whether it is ok to install on this # machine, switch to 'reinstall' state and then invoke the reinstall @@ -230,43 +243,43 @@ class BootManager: if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ): return 0 self.VARS['BOOT_STATE']= 'reinstall' - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) + UpdateRunLevelWithPLC.Run( self.VARS, self.LOG ) _reinstallRun() def _debugRun(state='failboot'): - # implements debug logic, which just starts the sshd - # and just waits around - self.VARS['BOOT_STATE']=state - UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) + # implements debug logic, which starts the sshd and just waits around + self.VARS['RUN_LEVEL']=state + UpdateRunLevelWithPLC.Run( self.VARS, self.LOG ) StartDebug.Run( self.VARS, self.LOG ) + # fsck/mount fs if present, and ignore return value if it's not. + ValidateNodeInstall.Run( self.VARS, self.LOG ) - def _badRun(): + def _badstateRun(): # should never happen; log event self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE']) _debugRun() - global NodeRunStates # setup state -> function hash table - NodeRunStates['install'] = _newRun - NodeRunStates['reinstall'] = _reinstallRun - NodeRunStates['boot'] = _bootRun - NodeRunStates['failboot'] = _bootRun # should always try to boot. - NodeRunStates['safeboot'] = lambda : _debugRun('safeboot') - NodeRunStates['disabled'] = lambda : _debugRun('disabled') + BootManager.NodeRunStates['reinstall'] = _reinstallRun + BootManager.NodeRunStates['boot'] = _bootRun + BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot') + BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled') success = 0 try: InitializeBootManager.Run( self.VARS, self.LOG ) ReadNodeConfiguration.Run( self.VARS, self.LOG ) AuthenticateWithPLC.Run( self.VARS, self.LOG ) + StartRunlevelAgent.Run( self.VARS, self.LOG ) GetAndUpdateNodeDetails.Run( self.VARS, self.LOG ) # override machine's current state from the command line if self.forceState is not None: self.VARS['BOOT_STATE']= self.forceState UpdateBootStateWithPLC.Run( self.VARS, self.LOG ) + UpdateRunLevelWithPLC.Run( self.VARS, self.LOG ) - stateRun = NodeRunStates.get(self.VARS['BOOT_STATE'],_badRun) + stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun) stateRun() success = 1 @@ -297,31 +310,23 @@ def main(argv): import utils utils.prompt_for_breakpoint_mode() - #utils.breakpoint ("Entering BootManager::main") + utils.breakpoint ("Entering BootManager::main") - global NodeRunStates - NodeRunStates = {'install':None, - 'reinstall':None, - 'boot':None, - 'safeboot':None, - 'failboot':None, - 'disabled':None, } - # set to 1 if error occurred error= 0 # all output goes through this class so we can save it and post # the data back to PlanetLab central - LOG= log( LOG_FILE ) + LOG= log( BM_NODE_LOG ) LOG.LogEntry( "BootManager started at: %s" % \ - strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) ) + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) ) try: forceState = None if len(argv) == 2: fState = argv[1] - if NodeRunStates.has_key(fState): + if BootManager.NodeRunStates.has_key(fState): forceState = fState else: LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState) @@ -332,7 +337,7 @@ def main(argv): if error: LOG.LogEntry( "BootManager finished at: %s" % \ - strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) ) + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) ) LOG.Upload() return error @@ -341,8 +346,7 @@ def main(argv): if bm.CAN_RUN == 0: LOG.LogEntry( "Unable to initialize BootManager." ) else: - LOG.LogEntry( "Running version %s of BootManager." % - bm.VARS['VERSION'] ) + LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] ) success= bm.Run() if success: LOG.LogEntry( "\nDone!" ); @@ -354,7 +358,7 @@ def main(argv): traceback.print_exc() LOG.LogEntry( "BootManager finished at: %s" % \ - strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) ) + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) ) LOG.Upload() return error