# Attempt to reboot a node in debug state.
-import plc
+from monitor import const
+from monitor.database.info.model import *
+from monitor.wrapper import plc
api = plc.getAuthAPI()
import sys
import os
-import const
from getsshkeys import SSHKnownHosts
import subprocess
import time
-import database
-import moncommands
+from monitor.util import command as moncommands
from sets import Set
-import ssh.pxssh as pxssh
-import ssh.fdpexpect as fdpexpect
-import ssh.pexpect as pexpect
-from unified_model import *
-from emailTxt import mailtxt
+from pcucontrol.transports.ssh import pxssh as pxssh
+from pcucontrol.transports.ssh import fdpexpect as fdpexpect
+from pcucontrol.transports.ssh import pexpect as pexpect
+from monitor.model import *
+from monitor.wrapper.emailTxt import mailtxt
from nodeconfig import network_config_to_str
import traceback
-import config
+from monitor import config
import signal
class Sopen(subprocess.Popen):
from Rpyc.Utils import *
fb = None
-def get_fbnode(node):
- global fb
- if fb is None:
- fb = database.dbLoad("findbad")
- fbnode = fb['nodes'][node]['values']
- return fbnode
-
class NodeConnection:
def __init__(self, connection, node, config):
self.node = node
# NOTE: Nothing works if the bootcd is REALLY old.
# So, this is the first step.
- fbnode = get_fbnode(hostname)
+ fbnode = FindbadNodeRecord.get_latest_by(hostname=hostname).to_dict()
if fbnode['category'] == "OLDBOOTCD":
print "...NOTIFY OWNER TO UPDATE BOOTCD!!!"
args = {}
mailtxt.newbootcd_one[1] % args, True, db='bootcd_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
print "\tDisabling %s due to out-of-date BOOTCD" % hostname
api.UpdateNode(hostname, {'boot_state' : 'disable'})
try:
k = SSHKnownHosts(); k.update(node); k.write(); del k
except:
+ from monitor.common import email_exception
+ email_exception()
print traceback.print_exc()
return False
else:
session = PlanetLabSession(node, config.nosetup, config.verbose)
except Exception, e:
- print "ERROR setting up session for %s" % hostname
+ msg = "ERROR setting up session for %s" % hostname
+ print msg
print traceback.print_exc()
+ from monitor.common import email_exception
+ email_exception(msg)
print e
return False
conn = session.get_connection(config)
except:
print traceback.print_exc()
+ from monitor.common import email_exception
+ email_exception()
return False
if forced_action == "reboot":
mailtxt.baddisk[1] % args, True, db='hardware_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
conn.set_nodestate('disable')
return False
"bminit-cfg-auth-getplc-update-hardware-installinit-installdisk-installbootfs-installcfg-installstop-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-kernelcopyfail-exception-update-debug-done",
"bminit-cfg-auth-getplc-hardware-installinit-installdisk-installbootfs-installcfg-installstop-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-kernelcopyfail-exception-update-debug-done",
"bminit-cfg-auth-getplc-installinit-validate-exception-noinstall-update-debug-done",
+ # actual solution appears to involve removing the bad files, and
+ # continually trying to boot the node.
+ "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-implementerror-update-debug-done",
]:
sequences.update({n : "restart_bootmanager_rins"})
"bminit-cfg-auth-implementerror-bootcheckfail-update-implementerror-bootupdatefail-done",
"bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-update3-implementerror-nospace-update-debug-done",
"bminit-cfg-auth-getplc-hardware-installinit-installdisk-installbootfs-exception-downloadfail-update-debug-done",
+ "bminit-cfg-auth-getplc-update-installinit-validate-implementerror-update-debug-done",
]:
sequences.update({n: "restart_node_boot"})
m = PersistMessage(hostname, mailtxt.unknownsequence[0] % args,
mailtxt.unknownsequence[1] % args, False, db='unknown_persistmessages')
m.reset()
- m.send(['monitor-list@lists.planet-lab.org'])
+ m.send([config.cc_email])
conn.restart_bootmanager('boot')
m = PersistMessage(hostname, "Suspicous error from BootManager on %s" % args,
mailtxt.unknownsequence[1] % args, False, db='suspect_persistmessages')
m.reset()
- m.send(['monitor-list@lists.planet-lab.org'])
+ m.send([config.cc_email])
conn.restart_bootmanager('boot')
m = PersistMessage(hostname, mailtxt.plnode_cfg[0] % args, mailtxt.plnode_cfg[1] % args,
True, db='nodeid_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
conn.dump_plconf_file()
conn.set_nodestate('disable')
args = {}
args['hostname'] = hostname
args['bmlog'] = conn.get_bootmanager_log().read()
- m = PersistMessage(hostname, mailtxt.plnode_network[0] % args, mailtxt.plnode_cfg[1] % args,
+ m = PersistMessage(hostname, mailtxt.plnode_cfg[0] % args, mailtxt.plnode_cfg[1] % args,
True, db='nodenet_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
conn.dump_plconf_file()
conn.set_nodestate('disable')
mailtxt.newalphacd_one[1] % args, True, db='bootcd_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
print "\tDisabling %s due to out-of-date BOOTCD" % hostname
conn.set_nodestate('disable')
mailtxt.baddisk[1] % args, True, db='hardware_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
conn.set_nodestate('disable')
elif sequences[s] == "update_hardware_email":
mailtxt.minimalhardware[1] % args, True, db='minhardware_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
conn.set_nodestate('disable')
elif sequences[s] == "bad_dns_email":
node = api.GetNodes(hostname)[0]
net = api.GetNodeNetworks(node['nodenetwork_ids'])[0]
except:
+ from monitor.common import email_exception
+ email_exception()
print traceback.print_exc()
# TODO: api error. skip email, b/c all info is not available,
# flag_set will not be recorded.
mailtxt.baddns[1] % args, True, db='baddns_persistmessages')
loginbase = plc.siteId(hostname)
- m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase])
+ emails = plc.getTechEmails(loginbase)
+ m.send(emails)
conn.set_nodestate('disable')
if flag_set:
# MAIN -------------------------------------------------------------------
def main():
- import parser as parsermodule
+ from monitor import parser as parsermodule
parser = parsermodule.getParser()
parser.set_defaults(child=False, collect=False, nosetup=False, verbose=False,