X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=bootman.py;h=e7a47c3a90aa94cfe248a93df438c738b12d97fc;hb=b548c69db3d1f302b4d0d08377f0231eb3c4fd58;hp=a278afecae6a900590b0ad9706611db9cc1b19b5;hpb=944d143a6528c4157b71f51ed480aec806cbaa06;p=monitor.git diff --git a/bootman.py b/bootman.py index a278afe..e7a47c3 100755 --- a/bootman.py +++ b/bootman.py @@ -24,7 +24,7 @@ from unified_model import * from emailTxt import mailtxt from nodeconfig import network_config_to_str import traceback -import monitorconfig +import config import signal class Sopen(subprocess.Popen): @@ -34,9 +34,12 @@ class Sopen(subprocess.Popen): #from Rpyc import SocketConnection, Async from Rpyc import SocketConnection, Async from Rpyc.Utils import * +fb = None def get_fbnode(node): - fb = database.dbLoad("findbad") + global fb + if fb is None: + fb = database.dbLoad("findbad") fbnode = fb['nodes'][node]['values'] return fbnode @@ -204,7 +207,7 @@ class PlanetLabSession: args['port'] = self.port args['user'] = 'root' args['hostname'] = self.node - args['monitordir'] = monitorconfig.MONITOR_SCRIPT_ROOT + args['monitordir'] = config.MONITOR_SCRIPT_ROOT ssh_port = 22 if self.nosetup: @@ -359,7 +362,6 @@ def reboot(hostname, config=None, forced_action=None): except: print traceback.print_exc() return False - if forced_action == "reboot": conn.restart_node('rins') @@ -503,6 +505,7 @@ def reboot(hostname, config=None, forced_action=None): ('nodehostname' , 'Configured node hostname does not resolve'), ('implementerror', 'Implementation Error'), ('readonlyfs' , '[Errno 30] Read-only file system'), + ('baddisk' , "IOError: [Errno 13] Permission denied: '/tmp/mnt/sysimg//vservers/\w+/etc/hosts'"), ('noinstall' , 'notinstalled'), ('bziperror' , 'bzip2: Data integrity error when decompressing.'), ('noblockdev' , "No block devices detected."), @@ -512,6 +515,7 @@ def reboot(hostname, config=None, forced_action=None): ('hardwarerequirefail' , 'Hardware requirements not met'), ('mkfsfail' , 'while running: Running mkfs.ext2 -q -m 0 -j /dev/planetlab/vservers failed'), ('nofilereference', "No such file or directory: '/tmp/mnt/sysimg//vservers/.vref/planetlab-f8-i386/etc/hosts'"), + ('kernelcopyfail', "cp: cannot stat `/tmp/mnt/sysimg/boot/kernel-boot': No such file or directory"), ('chrootfail' , 'Running chroot /tmp/mnt/sysimg'), ('modulefail' , 'Unable to get list of system modules'), ('writeerror' , 'write error: No space left on device'), @@ -539,11 +543,11 @@ def reboot(hostname, config=None, forced_action=None): # By using the sequence identifier, we guarantee that there will be no # frequent loops. I'm guessing there is a better way to track loops, # though. - if not config.force and pflags.getRecentFlag(s): - pflags.setRecentFlag(s) - pflags.save() - print "... flag is set or it has already run recently. Skipping %s" % node - return True + #if not config.force and pflags.getRecentFlag(s): + # pflags.setRecentFlag(s) + # pflags.save() + # print "... flag is set or it has already run recently. Skipping %s" % node + # return True sequences = {} @@ -581,6 +585,7 @@ def reboot(hostname, config=None, forced_action=None): "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-update3-implementerror-nofilereference-update-debug-done", "bminit-cfg-auth-getplc-update-hardware-installinit-installdisk-exception-mkfsfail-update-debug-done", "bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-exception-chrootfail-update-debug-done", + "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-exception-chrootfail-update-debug-done", "bminit-cfg-auth-getplc-installinit-validate-exception-noinstall-update-debug-done", ]: sequences.update({n : "restart_bootmanager_rins"})