X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=bootman.py;h=640f9ee0dba6570bfd199ef22e9af3420dd62a65;hb=9b8db783f3d465c4652b806395d5c0873b498cbd;hp=e8dc7b89dae9d55d71115a520c8587e7a980ec5c;hpb=90b2e8e7cb145cb1f6b3780867617084441b6ca9;p=monitor.git diff --git a/bootman.py b/bootman.py index e8dc7b8..640f9ee 100755 --- a/bootman.py +++ b/bootman.py @@ -2,29 +2,29 @@ # Attempt to reboot a node in debug state. -import plc +from monitor import const +from monitor.database.info.model import * +from monitor.wrapper import plc api = plc.getAuthAPI() import sys import os -import const from getsshkeys import SSHKnownHosts import subprocess import time -import database -import moncommands +from pcucontrol.util import command as moncommands from sets import Set -import ssh.pxssh as pxssh -import ssh.fdpexpect as fdpexpect -import ssh.pexpect as pexpect -from unified_model import * -from emailTxt import mailtxt +from pcucontrol.transports.ssh import pxssh as pxssh +from pcucontrol.transports.ssh import fdpexpect as fdpexpect +from pcucontrol.transports.ssh import pexpect as pexpect +from monitor.model import * +from monitor.wrapper.emailTxt import mailtxt from nodeconfig import network_config_to_str import traceback -import config +from monitor import config import signal class Sopen(subprocess.Popen): @@ -317,7 +317,8 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.newbootcd_one[1] % args, True, db='bootcd_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) print "\tDisabling %s due to out-of-date BOOTCD" % hostname api.UpdateNode(hostname, {'boot_state' : 'disable'}) @@ -330,6 +331,8 @@ def reboot(hostname, config=None, forced_action=None): try: k = SSHKnownHosts(); k.update(node); k.write(); del k except: + from monitor.common import email_exception + email_exception() print traceback.print_exc() return False @@ -339,8 +342,11 @@ def reboot(hostname, config=None, forced_action=None): else: session = PlanetLabSession(node, config.nosetup, config.verbose) except Exception, e: - print "ERROR setting up session for %s" % hostname + msg = "ERROR setting up session for %s" % hostname + print msg print traceback.print_exc() + from monitor.common import email_exception + email_exception(msg) print e return False @@ -354,6 +360,8 @@ def reboot(hostname, config=None, forced_action=None): conn = session.get_connection(config) except: print traceback.print_exc() + from monitor.common import email_exception + email_exception() return False if forced_action == "reboot": @@ -448,7 +456,8 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.baddisk[1] % args, True, db='hardware_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) conn.set_nodestate('disable') return False @@ -582,6 +591,9 @@ def reboot(hostname, config=None, forced_action=None): "bminit-cfg-auth-getplc-update-hardware-installinit-installdisk-installbootfs-installcfg-installstop-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-kernelcopyfail-exception-update-debug-done", "bminit-cfg-auth-getplc-hardware-installinit-installdisk-installbootfs-installcfg-installstop-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-kernelcopyfail-exception-update-debug-done", "bminit-cfg-auth-getplc-installinit-validate-exception-noinstall-update-debug-done", + # actual solution appears to involve removing the bad files, and + # continually trying to boot the node. + "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-implementerror-update-debug-done", ]: sequences.update({n : "restart_bootmanager_rins"}) @@ -610,6 +622,7 @@ def reboot(hostname, config=None, forced_action=None): "bminit-cfg-auth-implementerror-bootcheckfail-update-implementerror-bootupdatefail-done", "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-update3-implementerror-nospace-update-debug-done", "bminit-cfg-auth-getplc-hardware-installinit-installdisk-installbootfs-exception-downloadfail-update-debug-done", + "bminit-cfg-auth-getplc-update-installinit-validate-implementerror-update-debug-done", ]: sequences.update({n: "restart_node_boot"}) @@ -666,7 +679,7 @@ def reboot(hostname, config=None, forced_action=None): m = PersistMessage(hostname, mailtxt.unknownsequence[0] % args, mailtxt.unknownsequence[1] % args, False, db='unknown_persistmessages') m.reset() - m.send(['monitor-list@lists.planet-lab.org']) + m.send([config.cc_email]) conn.restart_bootmanager('boot') @@ -704,7 +717,7 @@ def reboot(hostname, config=None, forced_action=None): m = PersistMessage(hostname, "Suspicous error from BootManager on %s" % args, mailtxt.unknownsequence[1] % args, False, db='suspect_persistmessages') m.reset() - m.send(['monitor-list@lists.planet-lab.org']) + m.send([config.cc_email]) conn.restart_bootmanager('boot') @@ -715,7 +728,8 @@ def reboot(hostname, config=None, forced_action=None): m = PersistMessage(hostname, mailtxt.plnode_cfg[0] % args, mailtxt.plnode_cfg[1] % args, True, db='nodeid_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) conn.dump_plconf_file() conn.set_nodestate('disable') @@ -724,10 +738,11 @@ def reboot(hostname, config=None, forced_action=None): args = {} args['hostname'] = hostname args['bmlog'] = conn.get_bootmanager_log().read() - m = PersistMessage(hostname, mailtxt.plnode_network[0] % args, mailtxt.plnode_cfg[1] % args, + m = PersistMessage(hostname, mailtxt.plnode_cfg[0] % args, mailtxt.plnode_cfg[1] % args, True, db='nodenet_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) conn.dump_plconf_file() conn.set_nodestate('disable') @@ -742,7 +757,8 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.newalphacd_one[1] % args, True, db='bootcd_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) print "\tDisabling %s due to out-of-date BOOTCD" % hostname conn.set_nodestate('disable') @@ -760,7 +776,8 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.baddisk[1] % args, True, db='hardware_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) conn.set_nodestate('disable') elif sequences[s] == "update_hardware_email": @@ -772,7 +789,8 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.minimalhardware[1] % args, True, db='minhardware_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) conn.set_nodestate('disable') elif sequences[s] == "bad_dns_email": @@ -782,6 +800,8 @@ def reboot(hostname, config=None, forced_action=None): node = api.GetNodes(hostname)[0] net = api.GetNodeNetworks(node['nodenetwork_ids'])[0] except: + from monitor.common import email_exception + email_exception() print traceback.print_exc() # TODO: api error. skip email, b/c all info is not available, # flag_set will not be recorded. @@ -795,7 +815,8 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.baddns[1] % args, True, db='baddns_persistmessages') loginbase = plc.siteId(hostname) - m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + emails = plc.getTechEmails(loginbase) + m.send(emails) conn.set_nodestate('disable') if flag_set: @@ -808,7 +829,7 @@ def reboot(hostname, config=None, forced_action=None): # MAIN ------------------------------------------------------------------- def main(): - import parser as parsermodule + from monitor import parser as parsermodule parser = parsermodule.getParser() parser.set_defaults(child=False, collect=False, nosetup=False, verbose=False,