oops

[bootmanager.git] / source / BootManager.py
diff --git a/source/BootManager.py b/source/BootManager.py

index 46b5759..2f4f74f 100755 (executable)
--- a/source/BootManager.py
+++ b/source/BootManager.py
@@ -1,13 +1,14 @@
  #!/usr/bin/python -u
-
+#
  # Copyright (c) 2003 Intel Corporation
  # All rights reserved.
  #
  # Copyright (c) 2004-2006 The Trustees of Princeton University
  # All rights reserved.
  
+import sys, os
+import traceback
  import string
-import sys, os, traceback
  import time
  import gzip
  
@@ -15,87 +16,158 @@ from steps import *
  from Exceptions import *
  import notify_messages
  import BootServerRequest
+import utils
  
  # all output is written to this file
-BM_NODE_LOG= "/tmp/bm.log"
-UPLOAD_LOG_SCRIPT = "/boot/upload-bmlog.php"
+BM_NODE_LOG = "/tmp/bm.log"
+VARS_FILE = "configuration"
  
  # the new contents of PATH when the boot manager is running
-BIN_PATH= ('/usr/local/bin',
-           '/usr/local/sbin',
-           '/usr/bin',
-           '/usr/sbin',
-           '/bin',
-           '/sbin')
-           
+BIN_PATH = ('/usr/local/bin',
+            '/usr/local/sbin',
+            '/usr/bin',
+            '/usr/sbin',
+            '/bin',
+            '/sbin')
+
+def read_configuration_file(filename):
+    # read in and store all variables in VARS_FILE into each line
+    # is in the format name=val (any whitespace around the = is
+    # removed. everything after the = to the end of the line is
+    # the value
+    vars = {}
+    vars_file = file(filename,'r')
+    validConfFile = True
+    for line in vars_file:
+        # if its a comment or a whitespace line, ignore
+        if line[:1] == "#" or string.strip(line) == "":
+            continue
+
+        parts = string.split(line, "=")
+        if len(parts) != 2:
+            validConfFile = False
+            raise Exception("Invalid line in vars file: {}".format(line))
+
+        name = string.strip(parts[0])
+        value = string.strip(parts[1])
+        value = value.replace("'", "")   # remove quotes
+        value = value.replace('"', "")   # remove quotes
+        vars[name] = value
+
+    vars_file.close()
+    if not validConfFile:
+        raise Exception("Unable to read configuration vars.")
+
+    # find out which directory we are running it, and set a variable
+    # for that. future steps may need to get files out of the bootmanager
+    # directory
+    current_dir = os.getcwd()
+    vars['BM_SOURCE_DIR'] = current_dir
+
+    return vars
+
  ##############################
  class log:
  
-    format="%H:%M:%S(%Z) "
+    format = "%H:%M:%S(%Z) "
  
-    def __init__( self, OutputFilePath= None ):
+    def __init__(self, OutputFilePath=None):
          try:
-            self.OutputFile= open( OutputFilePath, "w")
-            self.OutputFilePath= OutputFilePath
+            self.OutputFile = open(OutputFilePath, "w")
+            self.OutputFilePath = OutputFilePath
          except:
-            print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
-            self.OutputFile= None
+            print("bootmanager log : Unable to open output file {}, continuing"\
+                  .format(OutputFilePath))
+            self.OutputFile = None
+
+        self.VARS = None
+        try:
+            vars = read_configuration_file(VARS_FILE)
+            self.VARS = vars
+        except Exception, e:
+            self.LogEntry(str(e))
+            return
      
-    def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
-        now=time.strftime(log.format, time.localtime())
+    def LogEntry(self, str, inc_newline = 1, display_screen = 1):
+        now = time.strftime(log.format, time.localtime())
          if self.OutputFile:
-            self.OutputFile.write( now+str )
+            self.OutputFile.write(now + str)
          if display_screen:
-            sys.stdout.write( now+str )
+            sys.stdout.write(now + str)
              
          if inc_newline:
              if display_screen:
-                sys.stdout.write( "\n" )
+                sys.stdout.write("\n")
              if self.OutputFile:
-                self.OutputFile.write( "\n" )
+                self.OutputFile.write("\n")
  
          if self.OutputFile:
              self.OutputFile.flush()
  
-    def write( self, str ):
+    def write(self, str):
          """
          make log behave like a writable file object (for traceback
          prints)
          """
-        self.LogEntry( str, 0, 1 )
+        self.LogEntry(str, 0, 1)
      
+    def print_stack(self):
+        """
+        dump current stack in log
+        """
+        self.write(traceback.format_exc())
+
      # bm log uploading is available back again, as of nodeconfig-5.0-2
-    def Upload( self ):
+    def Upload(self, extra_file=None):
          """
          upload the contents of the log to the server
          """
          if self.OutputFile is not None:
              self.OutputFile.flush()
  
-            self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_SCRIPT )
+            self.LogEntry("Uploading logs to {}".format(self.VARS['UPLOAD_LOG_SCRIPT']))
              
              self.OutputFile.close()
-            self.OutputFile= None
+            self.OutputFile = None
+
+            hostname = self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
+                       self.VARS['INTERFACE_SETTINGS']['domainname']
+            bs_request = BootServerRequest.BootServerRequest(self.VARS)
+            try:
+                # this was working until f10
+                bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
+                                       GetVars = None, PostVars = None,
+                                       DoSSL = True, DoCertCheck = True,
+                                       FormData = ["log=@" + self.OutputFilePath,
+                                                   "hostname=" + hostname, 
+                                                   "type=bm.log"])
+            except:
+                # new pycurl
+                import pycurl
+                bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
+                                       GetVars = None, PostVars = None,
+                                       DoSSL = True, DoCertCheck = True,
+                                       FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
+                                                   ("hostname",hostname),
+                                                   ("type","bm.log")])
+        if extra_file is not None:
+            # NOTE: for code-reuse, evoke the bash function 'upload_logs'; 
+            # by adding --login, bash reads .bash_profile before execution.
+            # Also, never fail, since this is an optional feature.
+            utils.sysexec_noerr("""bash --login -c "upload_logs {}" """.format(extra_file), self)
  
-            bs_request = BootServerRequest.BootServerRequest()
-            bs_request.MakeRequest(PartialPath = UPLOAD_LOG_SCRIPT,
-                                   GetVars = None, PostVars = None,
-                                   FormData = ["log=@" + self.OutputFilePath],
-                                   DoSSL = True, DoCertCheck = True)
  
  ##############################
  class BootManager:
  
      # file containing initial variables/constants
-    VARS_FILE = "configuration"
  
      # the set of valid node run states
-    NodeRunStates = {'install':None,
-                     'reinstall':None,
-                     'boot':None,
-                     'failboot':None,
-                     'safeboot':None,
-                     'disabled':None,
+    NodeRunStates = {'reinstall' : None,
+                     'upgrade' : None,
+                     'boot' : None,
+                     'safeboot' : None,
+                     'disabled' : None,
                       }
      
      def __init__(self, log, forceState):
@@ -103,52 +175,22 @@ class BootManager:
          self.forceState = forceState
  
          # the main logging point
-        self.LOG= log
+        self.LOG = log
  
          # set to 1 if we can run after initialization
          self.CAN_RUN = 0
-             
-        # read in and store all variables in VARS_FILE into each line
-        # is in the format name=val (any whitespace around the = is
-        # removed. everything after the = to the end of the line is
-        # the value
-        vars = {}
-        vars_file= file(self.VARS_FILE,'r')
-        validConfFile = True
-        for line in vars_file:
-            # if its a comment or a whitespace line, ignore
-            if line[:1] == "#" or string.strip(line) == "":
-                continue
-
-            parts= string.split(line,"=")
-            if len(parts) != 2:
-                self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
-                validConfFile = False
-                break
-
-            name= string.strip(parts[0])
-            value= string.strip(parts[1])
-            vars[name]= value
-
-        vars_file.close()
-        if not validConfFile:
-            self.LOG.LogEntry( "Unable to read configuration vars." )
-            return
-
-        # find out which directory we are running it, and set a variable
-        # for that. future steps may need to get files out of the bootmanager
-        # directory
-        current_dir= os.getcwd()
-        vars['BM_SOURCE_DIR']= current_dir
  
+        if log.VARS:
+            # this contains a set of information used and updated by each step
+            self.VARS = log.VARS
+        else:
+            return
+             
          # not sure what the current PATH is set to, replace it with what
          # we know will work with all the boot cds
-        os.environ['PATH']= string.join(BIN_PATH,":")
-                   
-        # this contains a set of information used and updated by each step
-        self.VARS= vars
+        os.environ['PATH'] = string.join(BIN_PATH,":")
  
-        self.CAN_RUN= 1
+        self.CAN_RUN = 1
  
      def Run(self):
          """
@@ -165,20 +207,19 @@ class BootManager:
          If requriements not met, but tests were succesfull, return 0.
  
          for steps that run within the installer, they are expected to either
-        complete succesfully and return 1, or throw an execption.
+        complete succesfully and return 1, or throw an exception.
  
          For exact return values and expected operations, see the comments
          at the top of each of the invididual step functions.
          """
  
-        def _nodeNotInstalled():
+        def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
              # called by the _xxxState() functions below upon failure
-            self.VARS['BOOT_STATE']= 'failboot'
-            self.VARS['STATE_CHANGE_NOTIFY']= 1
-            self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
-                      notify_messages.MSG_NODE_NOT_INSTALLED
-            raise BootManagerException, \
-                  notify_messages.MSG_NODE_NOT_INSTALLED
+            self.VARS['RUN_LEVEL'] = 'failboot'
+            notify = getattr(notify_messages, message)
+            self.VARS['STATE_CHANGE_NOTIFY'] = 1
+            self.VARS['STATE_CHANGE_NOTIFY_MESSAGE'] = notify
+            raise BootManagerException, notify
  
          def _bootRun():
              # implements the boot logic, which consists of first
@@ -194,18 +235,30 @@ class BootManager:
              except:
                  pass
  
-            InstallInit.Run( self.VARS, self.LOG )                    
-            if ValidateNodeInstall.Run( self.VARS, self.LOG ):
-                WriteModprobeConfig.Run( self.VARS, self.LOG )
-                MakeInitrd.Run( self.VARS, self.LOG )
-                WriteNetworkConfig.Run( self.VARS, self.LOG )
-                CheckForNewDisks.Run( self.VARS, self.LOG )
-                SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
-                ChainBootNode.Run( self.VARS, self.LOG )
+            InstallInit.Run(self.VARS, self.LOG)                    
+            ret = ValidateNodeInstall.Run(self.VARS, self.LOG)
+            if ret == 1:
+# Thierry - feb. 2013 turning off WriteModprobeConfig for now on lxc
+# for one thing this won't work at all with f18, as modules.pcimap
+# has disappeared (Daniel suggested modules.aliases could be used instead)
+# and second, in any case it's been years now that modprobe.conf was deprecated
+# so most likely this code has no actual effect
+                if self.VARS['virt'] == 'vs':
+                    WriteModprobeConfig.Run(self.VARS, self.LOG)
+                WriteNetworkConfig.Run(self.VARS, self.LOG)
+                CheckForNewDisks.Run(self.VARS, self.LOG)
+                SendHardwareConfigToPLC.Run(self.VARS, self.LOG)
+                ChainBootNode.Run(self.VARS, self.LOG)
+            elif ret == -1:
+                _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
+            elif ret == -2:
+                _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
+            elif ret == -3:
+                _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
              else:
                  _nodeNotInstalled()
  
-        def _reinstallRun():
+        def _reinstallRun(upgrade=False):
  
              # starting the fallback/debug ssh daemon for safety:
              # if the node install somehow hangs, or if it simply takes ages, 
@@ -219,21 +272,23 @@ class BootManager:
              # the min. hardware requirements are met, install the
              # software, and upon correct installation will switch too
              # 'boot' state and chainboot into the production system
-            if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
-                self.VARS['BOOT_STATE']= 'failboot'
+            if not CheckHardwareRequirements.Run(self.VARS, self.LOG):
+                self.VARS['RUN_LEVEL'] = 'failboot'
                  raise BootManagerException, "Hardware requirements not met."
  
              # runinstaller
-            InstallInit.Run( self.VARS, self.LOG )                    
-            InstallPartitionDisks.Run( self.VARS, self.LOG )            
-            InstallBootstrapFS.Run( self.VARS, self.LOG )            
-            InstallWriteConfig.Run( self.VARS, self.LOG )
-            InstallUninitHardware.Run( self.VARS, self.LOG )
-            self.VARS['BOOT_STATE']= 'boot'
-            self.VARS['STATE_CHANGE_NOTIFY']= 1
-            self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
+            InstallInit.Run(self.VARS, self.LOG)                    
+            if not upgrade:
+                InstallPartitionDisks.Run(self.VARS, self.LOG)            
+            InstallBootstrapFS.Run(self.VARS, self.LOG)            
+            InstallWriteConfig.Run(self.VARS, self.LOG)
+            InstallUninitHardware.Run(self.VARS, self.LOG)
+            self.VARS['BOOT_STATE'] = 'boot'
+            self.VARS['STATE_CHANGE_NOTIFY'] = 1
+            self.VARS['STATE_CHANGE_NOTIFY_MESSAGE'] = \
                   notify_messages.MSG_INSTALL_FINISHED
-            UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
+            AnsibleHook.Run(self.VARS, self.LOG)
+            UpdateBootStateWithPLC.Run(self.VARS, self.LOG)
              _bootRun()
              
          def _installRun():
@@ -242,63 +297,74 @@ class BootManager:
              # machine, switch to 'reinstall' state and then invoke the reinstall
              # logic.  See reinstallState logic comments for further
              # details.
-            if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
+            if not ConfirmInstallWithUser.Run(self.VARS, self.LOG):
                  return 0
-            self.VARS['BOOT_STATE']= 'reinstall'
-            UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
+            self.VARS['BOOT_STATE'] = 'reinstall'
+
+            AnsibleHook.Run(self.VARS, self.LOG)
              _reinstallRun()
  
          def _debugRun(state='failboot'):
              # implements debug logic, which starts the sshd and just waits around
-            self.VARS['BOOT_STATE']=state
-            UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
-            StartDebug.Run( self.VARS, self.LOG )
+            self.VARS['RUN_LEVEL'] = state
+            StartDebug.Run(self.VARS, self.LOG)
+            # fsck/mount fs if present, and ignore return value if it's not.
+            ValidateNodeInstall.Run(self.VARS, self.LOG)
  
          def _badstateRun():
              # should never happen; log event
-            self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
+            self.LOG.write("\nInvalid BOOT_STATE = {}\n".format(self.VARS['BOOT_STATE']))
              _debugRun()
  
          # setup state -> function hash table
-        BootManager.NodeRunStates['install']    = _installRun
-        BootManager.NodeRunStates['reinstall']  = _reinstallRun
+        BootManager.NodeRunStates['reinstall']  = lambda : _reinstallRun(upgrade=False)
+        BootManager.NodeRunStates['upgrade']    = lambda : _reinstallRun(upgrade=True)
          BootManager.NodeRunStates['boot']       = _bootRun
-        BootManager.NodeRunStates['failboot']   = _bootRun   # should always try to boot.
          BootManager.NodeRunStates['safeboot']   = lambda : _debugRun('safeboot')
          BootManager.NodeRunStates['disabled']   = lambda : _debugRun('disabled')
  
          success = 0
          try:
-            InitializeBootManager.Run( self.VARS, self.LOG )
-            ReadNodeConfiguration.Run( self.VARS, self.LOG )
-            AuthenticateWithPLC.Run( self.VARS, self.LOG )
-            GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
+            InitializeBootManager.Run(self.VARS, self.LOG)
+            ReadNodeConfiguration.Run(self.VARS, self.LOG)
+            AuthenticateWithPLC.Run(self.VARS, self.LOG)
+            UpdateLastBootOnce.Run(self.VARS, self.LOG)
+            StartRunlevelAgent.Run(self.VARS, self.LOG)
+            GetAndUpdateNodeDetails.Run(self.VARS, self.LOG)
  
              # override machine's current state from the command line
              if self.forceState is not None:
-                self.VARS['BOOT_STATE']= self.forceState
-                UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
+                self.VARS['BOOT_STATE'] = self.forceState
+                UpdateBootStateWithPLC.Run(self.VARS, self.LOG)
  
-            stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
+            stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'], _badstateRun)
              stateRun()
              success = 1
  
-        except KeyError, e:
-            self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
-        except BootManagerException, e:
-            self.LOG.write( "\n\nException while running: %s\n" % str(e) )
+        except KeyError as e:
+            self.LOG.write("\n\nKeyError while running: {}\n".format(e))
+            self.LOG.print_stack ()
+        except BootManagerException as e:
+            self.LOG.write("\n\nException while running: {}\n".format(e))
+            self.LOG.print_stack ()
+        except BootManagerAuthenticationException as e:
+            self.LOG.write("\n\nFailed to Authenticate Node: {}\n".format(e))
+            self.LOG.print_stack ()
+            # sets /tmp/CANCEL_BOOT flag
+            StartDebug.Run(self.VARS, self.LOG)
+            # Return immediately b/c any other calls to API will fail
+            return success
          except:
-            self.LOG.write( "\n\nImplementation Error\n")
-            traceback.print_exc(file=self.LOG.OutputFile)
-            traceback.print_exc()
+            self.LOG.write("\n\nImplementation Error\n")
+            self.LOG.print_stack ()
  
          if not success:
              try:
                  _debugRun()
              except BootManagerException, e:
-                self.LOG.write( "\n\nException while running: %s\n" % str(e) )
+                self.LOG.write("\n\nException while running: {}\n".format(e))
              except:
-                self.LOG.write( "\n\nImplementation Error\n")
+                self.LOG.write("\n\nImplementation Error\n")
                  traceback.print_exc(file=self.LOG.OutputFile)
                  traceback.print_exc()
  
@@ -310,17 +376,20 @@ def main(argv):
      import utils
      utils.prompt_for_breakpoint_mode()
  
-    utils.breakpoint ("Entering BootManager::main")
+#    utils.breakpoint ("Entering BootManager::main")
      
      # set to 1 if error occurred
-    error= 0
+    error = 0
      
      # all output goes through this class so we can save it and post
      # the data back to PlanetLab central
-    LOG= log( BM_NODE_LOG )
+    LOG = log(BM_NODE_LOG)
+
+    # NOTE: assume CWD is BM's source directory, but never fail
+    utils.sysexec_noerr("./setup_bash_history_scripts.sh", LOG)
  
-    LOG.LogEntry( "BootManager started at: %s" % \
-                  time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
+    LOG.LogEntry("BootManager started at: {}"\
+                 .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
  
      try:
          forceState = None
@@ -329,36 +398,36 @@ def main(argv):
              if BootManager.NodeRunStates.has_key(fState):
                  forceState = fState
              else:
-                LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
+                LOG.LogEntry("FATAL: cannot force node run state to={}".format(fState))
                  error = 1
      except:
          traceback.print_exc(file=LOG.OutputFile)
          traceback.print_exc()
          
      if error:
-        LOG.LogEntry( "BootManager finished at: %s" % \
-                      time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
+        LOG.LogEntry("BootManager finished at: {}"\
+                     .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
          LOG.Upload()
          return error
  
      try:
-        bm= BootManager(LOG,forceState)
+        bm = BootManager(LOG, forceState)
          if bm.CAN_RUN == 0:
-            LOG.LogEntry( "Unable to initialize BootManager." )
+            LOG.LogEntry("Unable to initialize BootManager.")
          else:
-            LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
-            success= bm.Run()
+            LOG.LogEntry("Running version {} of BootManager.".format(bm.VARS['VERSION']))
+            success = bm.Run()
              if success:
-                LOG.LogEntry( "\nDone!" );
+                LOG.LogEntry("\nDone!");
              else:
-                LOG.LogEntry( "\nError occurred!" );
+                LOG.LogEntry("\nError occurred!");
                  error = 1
      except:
          traceback.print_exc(file=LOG.OutputFile)
          traceback.print_exc()
  
-    LOG.LogEntry( "BootManager finished at: %s" % \
-                  time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
+    LOG.LogEntry("BootManager finished at: {}"\
+                 .format(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
      LOG.Upload()
  
      return error