import ssh.pxssh as pxssh
import ssh.fdpexpect as fdpexpect
import ssh.pexpect as pexpect
-from unified_model import *
+from monitor.model import *
from emailTxt import mailtxt
from nodeconfig import network_config_to_str
import traceback
-import monitorconfig
+import config
import signal
class Sopen(subprocess.Popen):
#from Rpyc import SocketConnection, Async
from Rpyc import SocketConnection, Async
from Rpyc.Utils import *
-
-def get_fbnode(node):
- fb = database.dbLoad("findbad")
- fbnode = fb['nodes'][node]['values']
- return fbnode
+fb = None
class NodeConnection:
def __init__(self, connection, node, config):
args['port'] = self.port
args['user'] = 'root'
args['hostname'] = self.node
- args['monitordir'] = monitorconfig.MONITOR_SCRIPT_ROOT
+ args['monitordir'] = config.MONITOR_SCRIPT_ROOT
ssh_port = 22
if self.nosetup:
# NOTE: Nothing works if the bootcd is REALLY old.
# So, this is the first step.
- fbnode = get_fbnode(hostname)
+ fbnode = FindbadNodeRecord.get_latest_by(hostname=hostname).to_dict()
if fbnode['category'] == "OLDBOOTCD":
print "...NOTIFY OWNER TO UPDATE BOOTCD!!!"
args = {}
except:
print traceback.print_exc()
return False
-
if forced_action == "reboot":
conn.restart_node('rins')
('nodehostname' , 'Configured node hostname does not resolve'),
('implementerror', 'Implementation Error'),
('readonlyfs' , '[Errno 30] Read-only file system'),
+ ('baddisk' , "IOError: [Errno 13] Permission denied: '/tmp/mnt/sysimg//vservers/\w+/etc/hosts'"),
('noinstall' , 'notinstalled'),
('bziperror' , 'bzip2: Data integrity error when decompressing.'),
('noblockdev' , "No block devices detected."),
('hardwarerequirefail' , 'Hardware requirements not met'),
('mkfsfail' , 'while running: Running mkfs.ext2 -q -m 0 -j /dev/planetlab/vservers failed'),
('nofilereference', "No such file or directory: '/tmp/mnt/sysimg//vservers/.vref/planetlab-f8-i386/etc/hosts'"),
+ ('kernelcopyfail', "cp: cannot stat `/tmp/mnt/sysimg/boot/kernel-boot': No such file or directory"),
('chrootfail' , 'Running chroot /tmp/mnt/sysimg'),
('modulefail' , 'Unable to get list of system modules'),
('writeerror' , 'write error: No space left on device'),
# By using the sequence identifier, we guarantee that there will be no
# frequent loops. I'm guessing there is a better way to track loops,
# though.
- if not config.force and pflags.getRecentFlag(s):
- pflags.setRecentFlag(s)
- pflags.save()
- print "... flag is set or it has already run recently. Skipping %s" % node
- return True
+ #if not config.force and pflags.getRecentFlag(s):
+ # pflags.setRecentFlag(s)
+ # pflags.save()
+ # print "... flag is set or it has already run recently. Skipping %s" % node
+ # return True
sequences = {}
"bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-update3-implementerror-nofilereference-update-debug-done",
"bminit-cfg-auth-getplc-update-hardware-installinit-installdisk-exception-mkfsfail-update-debug-done",
"bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-exception-chrootfail-update-debug-done",
+ "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-exception-chrootfail-update-debug-done",
+ "bminit-cfg-auth-getplc-update-hardware-installinit-installdisk-installbootfs-installcfg-installstop-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-kernelcopyfail-exception-update-debug-done",
+ "bminit-cfg-auth-getplc-hardware-installinit-installdisk-installbootfs-installcfg-installstop-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-kernelcopyfail-exception-update-debug-done",
"bminit-cfg-auth-getplc-installinit-validate-exception-noinstall-update-debug-done",
+ # actual solution appears to involve removing the bad files, and
+ # continually trying to boot the node.
+ "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-implementerror-update-debug-done",
]:
sequences.update({n : "restart_bootmanager_rins"})