-#!/usr/bin/python2 -u
-
-# ------------------------------------------------------------------------
-# THIS file used to be named alpina.py, from the node installer. Since then
-# the installer has been expanded to include all the functions of the boot
-# manager as well, hence the new name for this file.
-# ------------------------------------------------------------------------
-
+#!/usr/bin/python -u
+#
+# $Id$
+# $URL$
+#
# Copyright (c) 2003 Intel Corporation
# All rights reserved.
-
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-
-# * Neither the name of the Intel Corporation nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-# YOUR JURISDICTION. It is licensee's responsibility to comply with any
-# export regulations applicable in licensee's jurisdiction. Under
-# CURRENT (May 2000) U.S. export regulations this software is eligible
-# for export from the U.S. and can be downloaded by or otherwise
-# exported or reexported worldwide EXCEPT to U.S. embargoed destinations
-# which include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-# Afghanistan and any other country to which the U.S. has embargoed
-# goods and services.
-
+#
+# Copyright (c) 2004-2006 The Trustees of Princeton University
+# All rights reserved.
import string
import sys, os, traceback
-from time import gmtime, strftime
-from gzip import GzipFile
+import time
+import gzip
from steps import *
from Exceptions import *
import notify_messages
-
-
+import BootServerRequest
# all output is written to this file
-LOG_FILE= "/tmp/bm.log"
-CURL_PATH= "curl"
-UPLOAD_LOG_URL = "http://boot.planet-lab.org/alpina-logs/upload.php"
+BM_NODE_LOG= "/tmp/bm.log"
+VARS_FILE = "configuration"
# the new contents of PATH when the boot manager is running
BIN_PATH= ('/usr/local/bin',
'/usr/local/sbin',
- '/bin',
- '/sbin',
'/usr/bin',
'/usr/sbin',
- '/usr/local/planetlab/bin')
-
-
-
+ '/bin',
+ '/sbin')
+
+def read_configuration_file(filename):
+ # read in and store all variables in VARS_FILE into each line
+ # is in the format name=val (any whitespace around the = is
+ # removed. everything after the = to the end of the line is
+ # the value
+ vars = {}
+ vars_file= file(filename,'r')
+ validConfFile = True
+ for line in vars_file:
+ # if its a comment or a whitespace line, ignore
+ if line[:1] == "#" or string.strip(line) == "":
+ continue
+
+ parts= string.split(line,"=")
+ if len(parts) != 2:
+ validConfFile = False
+ raise Exception( "Invalid line in vars file: %s" % line )
+
+ name= string.strip(parts[0])
+ value= string.strip(parts[1])
+ value= value.replace("'", "") # remove quotes
+ value= value.replace('"', "") # remove quotes
+ vars[name]= value
+
+ vars_file.close()
+ if not validConfFile:
+ raise Exception( "Unable to read configuration vars." )
+
+ # find out which directory we are running it, and set a variable
+ # for that. future steps may need to get files out of the bootmanager
+ # directory
+ current_dir= os.getcwd()
+ vars['BM_SOURCE_DIR']= current_dir
+
+ return vars
+
+##############################
class log:
+ format="%H:%M:%S(%Z) "
+
def __init__( self, OutputFilePath= None ):
- if OutputFilePath:
- try:
- self.OutputFilePath= OutputFilePath
- self.OutputFile= GzipFile( OutputFilePath, "w", 9 )
- except:
- print( "Unable to open output file for log, continuing" )
- self.OutputFile= None
+ try:
+ self.OutputFile= open( OutputFilePath, "w")
+ self.OutputFilePath= OutputFilePath
+ except:
+ print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
+ self.OutputFile= None
+ self.VARS = None
+ try:
+ vars = read_configuration_file(VARS_FILE)
+ self.VARS = vars
+ except Exception, e:
+ self.LogEntry( str(e) )
+ return
def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
+ now=time.strftime(log.format, time.localtime())
if self.OutputFile:
- self.OutputFile.write( str )
+ self.OutputFile.write( now+str )
if display_screen:
- sys.stdout.write( str )
+ sys.stdout.write( now+str )
if inc_newline:
if display_screen:
if self.OutputFile:
self.OutputFile.flush()
-
-
def write( self, str ):
"""
make log behave like a writable file object (for traceback
prints)
"""
self.LogEntry( str, 0, 1 )
-
-
+ # bm log uploading is available back again, as of nodeconfig-5.0-2
def Upload( self ):
"""
upload the contents of the log to the server
"""
-
if self.OutputFile is not None:
- self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_URL )
+ self.OutputFile.flush()
+
+ self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] )
self.OutputFile.close()
self.OutputFile= None
-
- curl_cmd= "%s -s --connect-timeout 60 --max-time 600 " \
- "--form log=@%s %s" % \
- (CURL_PATH, self.OutputFilePath, UPLOAD_LOG_URL)
- os.system( curl_cmd )
-
-
-
+ hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
+ self.VARS['INTERFACE_SETTINGS']['domainname']
+ bs_request = BootServerRequest.BootServerRequest(self.VARS)
+ try:
+ # this was working until f10
+ bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
+ GetVars = None, PostVars = None,
+ DoSSL = True, DoCertCheck = True,
+ FormData = ["log=@" + self.OutputFilePath,
+ "hostname=" + hostname,
+ "type=bm.log"])
+ except:
+ # new pycurl
+ import pycurl
+ bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
+ GetVars = None, PostVars = None,
+ DoSSL = True, DoCertCheck = True,
+ FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
+ ("hostname",hostname),
+ ("type","bm.log")])
+##############################
class BootManager:
# file containing initial variables/constants
- VARS_FILE = "configuration"
+ # the set of valid node run states
+ NodeRunStates = {'reinstall':None,
+ 'boot':None,
+ 'safeboot':None,
+ 'disabled':None,
+ }
- def __init__(self, log):
- # this contains a set of information used and updated
- # by each step
- self.VARS= {}
+ def __init__(self, log, forceState):
+ # override machine's current state from the command line
+ self.forceState = forceState
# the main logging point
self.LOG= log
# set to 1 if we can run after initialization
self.CAN_RUN = 0
-
- if not self.ReadBMConf():
- self.LOG.LogEntry( "Unable to read configuration vars." )
- return
-
- # find out which directory we are running it, and set a variable
- # for that. future steps may need to get files out of the bootmanager
- # directory
- current_dir= os.getcwd()
- self.VARS['BM_SOURCE_DIR']= current_dir
+ if log.VARS:
+ # this contains a set of information used and updated by each step
+ self.VARS= log.VARS
+ else:
+ return
+
# not sure what the current PATH is set to, replace it with what
# we know will work with all the boot cds
os.environ['PATH']= string.join(BIN_PATH,":")
-
- self.CAN_RUN= 1
-
-
-
-
- def ReadBMConf(self):
- """
- read in and store all variables in VARS_FILE into
- self.VARS
-
- each line is in the format name=val (any whitespace around
- the = is removed. everything after the = to the end of
- the line is the value
- """
-
- vars_file= file(self.VARS_FILE,'r')
- for line in vars_file:
- # if its a comment or a whitespace line, ignore
- if line[:1] == "#" or string.strip(line) == "":
- continue
-
- parts= string.split(line,"=")
- if len(parts) != 2:
- self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
- return 0
-
- name= string.strip(parts[0])
- value= string.strip(parts[1])
- self.VARS[name]= value
-
- return 1
-
+ self.CAN_RUN= 1
def Run(self):
"""
For exact return values and expected operations, see the comments
at the top of each of the invididual step functions.
"""
-
+
+ def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
+ # called by the _xxxState() functions below upon failure
+ self.VARS['RUN_LEVEL']= 'failboot'
+ notify = getattr(notify_messages, message)
+ self.VARS['STATE_CHANGE_NOTIFY']= 1
+ self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
+ raise BootManagerException, notify
+
+ def _bootRun():
+ # implements the boot logic, which consists of first
+ # double checking that the node was properly installed,
+ # checking whether someone added or changed disks, and
+ # then finally chain boots.
+
+ # starting the fallback/debug ssh daemon for safety:
+ # if the node install somehow hangs, or if it simply takes ages,
+ # we can still enter and investigate
+ try:
+ StartDebug.Run(self.VARS, self.LOG, last_resort = False)
+ except:
+ pass
+
+ InstallInit.Run( self.VARS, self.LOG )
+ ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
+ if ret == 1:
+ WriteModprobeConfig.Run( self.VARS, self.LOG )
+ WriteNetworkConfig.Run( self.VARS, self.LOG )
+ CheckForNewDisks.Run( self.VARS, self.LOG )
+ SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
+ ChainBootNode.Run( self.VARS, self.LOG )
+ elif ret == -1:
+ _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
+ elif ret == -2:
+ _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
+ elif ret == -3:
+ _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
+ else:
+ _nodeNotInstalled()
+
+ def _reinstallRun():
+
+ # starting the fallback/debug ssh daemon for safety:
+ # if the node install somehow hangs, or if it simply takes ages,
+ # we can still enter and investigate
+ try:
+ StartDebug.Run(self.VARS, self.LOG, last_resort = False)
+ except:
+ pass
+
+ # implements the reinstall logic, which will check whether
+ # the min. hardware requirements are met, install the
+ # software, and upon correct installation will switch too
+ # 'boot' state and chainboot into the production system
+ if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
+ self.VARS['RUN_LEVEL']= 'failboot'
+ raise BootManagerException, "Hardware requirements not met."
+
+ # runinstaller
+ InstallInit.Run( self.VARS, self.LOG )
+ InstallPartitionDisks.Run( self.VARS, self.LOG )
+ InstallBootstrapFS.Run( self.VARS, self.LOG )
+ InstallWriteConfig.Run( self.VARS, self.LOG )
+ InstallUninitHardware.Run( self.VARS, self.LOG )
+ self.VARS['BOOT_STATE']= 'boot'
+ self.VARS['STATE_CHANGE_NOTIFY']= 1
+ self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
+ notify_messages.MSG_INSTALL_FINISHED
+ UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
+ _bootRun()
+
+ def _installRun():
+ # implements the new install logic, which will first check
+ # with the user whether it is ok to install on this
+ # machine, switch to 'reinstall' state and then invoke the reinstall
+ # logic. See reinstallState logic comments for further
+ # details.
+ if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
+ return 0
+ self.VARS['BOOT_STATE']= 'reinstall'
+ UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
+ _reinstallRun()
+
+ def _debugRun(state='failboot'):
+ # implements debug logic, which starts the sshd and just waits around
+ self.VARS['RUN_LEVEL']=state
+ UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
+ StartDebug.Run( self.VARS, self.LOG )
+ # fsck/mount fs if present, and ignore return value if it's not.
+ ValidateNodeInstall.Run( self.VARS, self.LOG )
+
+ def _badstateRun():
+ # should never happen; log event
+ self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
+ _debugRun()
+
+ # setup state -> function hash table
+ BootManager.NodeRunStates['reinstall'] = _reinstallRun
+ BootManager.NodeRunStates['boot'] = _bootRun
+ BootManager.NodeRunStates['safeboot'] = lambda : _debugRun('safeboot')
+ BootManager.NodeRunStates['disabled'] = lambda : _debugRun('disabled')
+
+ success = 0
try:
InitializeBootManager.Run( self.VARS, self.LOG )
ReadNodeConfiguration.Run( self.VARS, self.LOG )
AuthenticateWithPLC.Run( self.VARS, self.LOG )
+ StartRunlevelAgent.Run( self.VARS, self.LOG )
GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
-
- if self.VARS['BOOT_STATE'] == 'new' or \
- self.VARS['BOOT_STATE'] == 'inst':
- if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
- return 0
-
- self.VARS['BOOT_STATE']= 'rins'
+
+ # override machine's current state from the command line
+ if self.forceState is not None:
+ self.VARS['BOOT_STATE']= self.forceState
UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
-
- if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
- self.VARS['BOOT_STATE']= 'dbg'
- UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
- raise BootManagerException, "Hardware requirements not met."
-
- self.RunInstaller()
-
- if ValidateNodeInstall.Run( self.VARS, self.LOG ):
- SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
- ChainBootNode.Run( self.VARS, self.LOG )
- else:
- self.VARS['BOOT_STATE']= 'dbg'
- self.VARS['STATE_CHANGE_NOTIFY']= 1
- self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
- notify_messages.MSG_NODE_NOT_INSTALLED
- UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
-
-
- elif self.VARS['BOOT_STATE'] == 'rins':
- if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
- self.VARS['BOOT_STATE']= 'dbg'
- UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
- raise BootManagerException, "Hardware requirements not met."
-
- self.RunInstaller()
-
- if ValidateNodeInstall.Run( self.VARS, self.LOG ):
- SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
- ChainBootNode.Run( self.VARS, self.LOG )
- else:
- self.VARS['BOOT_STATE']= 'dbg'
- self.VARS['STATE_CHANGE_NOTIFY']= 1
- self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
- notify_messages.MSG_NODE_NOT_INSTALLED
- UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
-
- elif self.VARS['BOOT_STATE'] == 'boot':
- if ValidateNodeInstall.Run( self.VARS, self.LOG ):
- UpdateNodeConfiguration.Run( self.VARS, self.LOG )
- CheckForNewDisks.Run( self.VARS, self.LOG )
- SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
- ChainBootNode.Run( self.VARS, self.LOG )
- else:
- self.VARS['BOOT_STATE']= 'dbg'
- self.VARS['STATE_CHANGE_NOTIFY']= 1
- self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
- notify_messages.MSG_NODE_NOT_INSTALLED
- UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
-
- elif self.VARS['BOOT_STATE'] == 'dbg':
- StartDebug.Run( self.VARS, self.LOG )
+ UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
+
+ stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
+ stateRun()
+ success = 1
except KeyError, e:
self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
except BootManagerException, e:
self.LOG.write( "\n\nException while running: %s\n" % str(e) )
-
- return 1
-
+ except BootManagerAuthenticationException, e:
+ self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
+ # sets /tmp/CANCEL_BOOT flag
+ StartDebug.Run(self.VARS, self.LOG )
+ # Return immediately b/c any other calls to API will fail
+ return success
+ except:
+ self.LOG.write( "\n\nImplementation Error\n")
+ traceback.print_exc(file=self.LOG.OutputFile)
+ traceback.print_exc()
+
+ if not success:
+ try:
+ _debugRun()
+ except BootManagerException, e:
+ self.LOG.write( "\n\nException while running: %s\n" % str(e) )
+ except:
+ self.LOG.write( "\n\nImplementation Error\n")
+ traceback.print_exc(file=self.LOG.OutputFile)
+ traceback.print_exc()
+ return success
- def RunInstaller(self):
- """
- since the installer can be invoked at more than one place
- in the boot manager logic, seperate the steps necessary
- to do it here
- """
-
- InstallInit.Run( self.VARS, self.LOG )
- InstallPartitionDisks.Run( self.VARS, self.LOG )
- InstallBootstrapRPM.Run( self.VARS, self.LOG )
- InstallWriteConfig.Run( self.VARS, self.LOG )
- InstallBuildVServer.Run( self.VARS, self.LOG )
- InstallNodeInit.Run( self.VARS, self.LOG )
- InstallUninitHardware.Run( self.VARS, self.LOG )
-
- self.VARS['BOOT_STATE']= 'boot'
- self.VARS['STATE_CHANGE_NOTIFY']= 1
- self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
- notify_messages.MSG_INSTALL_FINISHED
- UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
+
+def main(argv):
- SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
+ import utils
+ utils.prompt_for_breakpoint_mode()
+ utils.breakpoint ("Entering BootManager::main")
-
-if __name__ == "__main__":
-
- # set to 0 if no error occurred
- error= 1
+ # set to 1 if error occurred
+ error= 0
# all output goes through this class so we can save it and post
# the data back to PlanetLab central
- LOG= log( LOG_FILE )
+ LOG= log( BM_NODE_LOG )
LOG.LogEntry( "BootManager started at: %s" % \
- strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
+ time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
+
+ try:
+ forceState = None
+ if len(argv) == 2:
+ fState = argv[1]
+ if BootManager.NodeRunStates.has_key(fState):
+ forceState = fState
+ else:
+ LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
+ error = 1
+ except:
+ traceback.print_exc(file=LOG.OutputFile)
+ traceback.print_exc()
+
+ if error:
+ LOG.LogEntry( "BootManager finished at: %s" % \
+ time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
+ LOG.Upload()
+ return error
try:
- bm= BootManager(LOG)
+ bm= BootManager(LOG,forceState)
if bm.CAN_RUN == 0:
LOG.LogEntry( "Unable to initialize BootManager." )
else:
- LOG.LogEntry( "Running version %s of BootManager." %
- bm.VARS['VERSION'] )
+ LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
success= bm.Run()
if success:
LOG.LogEntry( "\nDone!" );
else:
LOG.LogEntry( "\nError occurred!" );
-
+ error = 1
except:
traceback.print_exc(file=LOG.OutputFile)
traceback.print_exc()
LOG.LogEntry( "BootManager finished at: %s" % \
- strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
-
+ time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
LOG.Upload()
+
+ return error
+
+if __name__ == "__main__":
+ error = main(sys.argv)
sys.exit(error)