From: Aaron Klingaman Date: Tue, 31 Jan 2006 21:19:03 +0000 (+0000) Subject: implement two changes to better handle, identify, and recover from X-Git-Tag: planetlab-3_3-branch-point~13 X-Git-Url: http://git.onelab.eu/?p=bootmanager.git;a=commitdiff_plain;h=d4e6bea456b74aec211e7df9757cfd49a815f2b5 implement two changes to better handle, identify, and recover from situation where no detected network driver is found. this should prevent machines from installing, and booting, only to become unresponsive due to no loaded network driver. the two changes are: 1. write out modprobe.conf at each boot, instead of only after install 2. if no network devices are written to modprobe.conf during boot, send a message to the tech contacts about missing net driver, and put the node into debug. --- diff --git a/source/configuration b/source/configuration index 0c02805..5684fcf 100644 --- a/source/configuration +++ b/source/configuration @@ -4,7 +4,7 @@ # the current version of the bootmanager -VERSION=3.1.14 +VERSION=3.1.15 # full url to which api server to contact diff --git a/source/notify_messages.py b/source/notify_messages.py index 71df874..f6bfb29 100644 --- a/source/notify_messages.py +++ b/source/notify_messages.py @@ -10,3 +10,4 @@ MSG_NO_NODE_CONFIG_FILE= "noconfig" MSG_AUTH_FAIL= "authfail" MSG_NODE_NOT_INSTALLED= "notinstalled" MSG_HOSTNAME_NOT_RESOLVE= "hostnamenotresolve" +MSG_NO_DETECTED_NETWORK= "nodetectednetwork" diff --git a/source/steps/ChainBootNode.py b/source/steps/ChainBootNode.py index 9695a8f..d9e7226 100644 --- a/source/steps/ChainBootNode.py +++ b/source/steps/ChainBootNode.py @@ -1,17 +1,22 @@ import string import re +import InstallWriteConfig +import UpdateBootStateWithPLC from Exceptions import * import utils import compatibility from systeminfo import systeminfo import BootAPI +import notify_messages def Run( vars, log ): """ Load the kernel off of a node and boot to it. This step assumes the disks are mounted on SYSIMG_PATH. + If successful, this function will not return. If it returns, no chain + booting has occurred. Expect the following variables: BOOT_CD_VERSION A tuple of the current bootcd version @@ -109,6 +114,11 @@ def Run( vars, log ): update_vals['ssh_host_key']= ssh_host_key BootAPI.call_api_function( vars, "BootUpdateNode", (update_vals,) ) + # rewrite modprobe.conf in case there were any module changes + # from a new kernel installed. + log.write( "Rewriting /etc/modprobe.conf\n" ) + (network_count,storage_count)= \ + InstallWriteConfig.write_modprobeconf_file( vars, log ) log.write( "Copying kernel and initrd for booting.\n" ) utils.sysexec( "cp %s/boot/kernel-boot /tmp/kernel" % SYSIMG_PATH, log ) @@ -123,6 +133,20 @@ def Run( vars, log ): ROOT_MOUNTED= 0 vars['ROOT_MOUNTED']= 0 + # before we do the real kexec, check to see if we had any + # network drivers written to modprobe.conf. if not, return -1, + # which will cause this node to be switched to a debug state. + if network_count == 0: + log.write( "\nIt appears we don't have any network drivers. Aborting.\n" ) + + vars['BOOT_STATE']= 'dbg' + vars['STATE_CHANGE_NOTIFY']= 1 + vars['STATE_CHANGE_NOTIFY_MESSAGE']= \ + notify_messages.MSG_NO_DETECTED_NETWORK + UpdateBootStateWithPLC.Run( vars, log ) + + return + log.write( "Unloading modules and chain booting to new kernel.\n" ) # further use of log after Upload will only output to screen diff --git a/source/steps/InstallWriteConfig.py b/source/steps/InstallWriteConfig.py index b9fd28f..f7e164f 100644 --- a/source/steps/InstallWriteConfig.py +++ b/source/steps/InstallWriteConfig.py @@ -153,42 +153,8 @@ def Run( vars, log ): # if the network modules are activated in a different order that the # boot cd. log.write( "Writing /etc/modprobe.conf\n" ) - - # get the kernel version - initrd= os.readlink( "%s/boot/initrd-boot" % SYSIMG_PATH ) - kernel_version= initrd.replace("initrd-", "").replace(".img", "") - - sysinfo= systeminfo() - sysmods= sysinfo.get_system_modules(SYSIMG_PATH, kernel_version) - if sysmods is None: - raise BootManagerException, "Unable to get list of system modules." - - eth_count= 0 - scsi_count= 0 - - modulesconf_file= file("%s/etc/modprobe.conf" % SYSIMG_PATH, "w" ) - - for type in sysmods: - if type == sysinfo.MODULE_CLASS_SCSI: - for a_mod in sysmods[type]: - if scsi_count == 0: - modulesconf_file.write( "alias scsi_hostadapter %s\n" % - a_mod ) - else: - modulesconf_file.write( "alias scsi_hostadapter%d %s\n" % - (scsi_count,a_mod) ) - scsi_count= scsi_count + 1 - - elif type == sysinfo.MODULE_CLASS_NETWORK: - for a_mod in sysmods[type]: - modulesconf_file.write( "alias eth%d %s\n" % - (eth_count,a_mod) ) - eth_count= eth_count + 1 - - modulesconf_file.close() - modulesconf_file= None - - + write_modprobeconf_file( vars, log ) + # dump the modprobe.conf file to the log (not to screen) log.write( "Contents of new modprobe.conf file:\n" ) modulesconf_file= file("%s/etc/modprobe.conf" % SYSIMG_PATH, "r" ) @@ -260,7 +226,8 @@ def Run( vars, log ): rootdev= file( "%s/%s" % (SYSIMG_PATH,PARTITIONS["mapper-root"]), "w" ) rootdev.close() - # initrd set above + initrd= os.readlink( "%s/boot/initrd-boot" % SYSIMG_PATH ) + kernel_version= initrd.replace("initrd-", "").replace(".img", "") utils.removefile( "%s/boot/%s" % (SYSIMG_PATH, initrd) ) utils.sysexec( "chroot %s mkinitrd /boot/initrd-%s.img %s" % \ (SYSIMG_PATH, kernel_version, kernel_version), log ) @@ -418,3 +385,64 @@ def write_network_configuration( vars, log ): network_file.close() network_file= None + + +def write_modprobeconf_file( vars, log ): + """ + write out the system file /etc/modprobe.conf with the current + set of modules. + + returns a tuple of the number of network driver lines and storage + driver lines written as (networkcount,storagecount) + """ + + # make sure we have this class loaded + from systeminfo import systeminfo + + try: + SYSIMG_PATH= vars["SYSIMG_PATH"] + if SYSIMG_PATH == "": + raise ValueError, "SYSIMG_PATH" + + except KeyError, var: + raise BootManagerException, "Missing variable in vars: %s\n" % var + except ValueError, var: + raise BootManagerException, "Variable in vars, shouldn't be: %s\n" % var + + + # get the kernel version + initrd= os.readlink( "%s/boot/initrd-boot" % SYSIMG_PATH ) + kernel_version= initrd.replace("initrd-", "").replace(".img", "") + + sysinfo= systeminfo() + sysmods= sysinfo.get_system_modules(SYSIMG_PATH, kernel_version) + if sysmods is None: + raise BootManagerException, "Unable to get list of system modules." + + eth_count= 0 + scsi_count= 0 + + modulesconf_file= file("%s/etc/modprobe.conf" % SYSIMG_PATH, "w" ) + + for type in sysmods: + if type == sysinfo.MODULE_CLASS_SCSI: + for a_mod in sysmods[type]: + if scsi_count == 0: + modulesconf_file.write( "alias scsi_hostadapter %s\n" % + a_mod ) + else: + modulesconf_file.write( "alias scsi_hostadapter%d %s\n" % + (scsi_count,a_mod) ) + scsi_count= scsi_count + 1 + + elif type == sysinfo.MODULE_CLASS_NETWORK: + for a_mod in sysmods[type]: + modulesconf_file.write( "alias eth%d %s\n" % + (eth_count,a_mod) ) + eth_count= eth_count + 1 + + modulesconf_file.close() + modulesconf_file= None + + return (eth_count,scsi_count) +