args = {}
args['hostname'] = hostname
args['bmlog'] = conn.get_bootmanager_log().read()
- m = PersistMessage(hostname, mailtxt.plnode_network[0] % args, mailtxt.plnode_cfg[1] % args,
+ m = PersistMessage(hostname, mailtxt.plnode_cfg[0] % args, mailtxt.plnode_cfg[1] % args,
True, db='nodenet_persistmessages')
loginbase = plc.siteId(hostname)
emails = plc.getTechEmails(loginbase)
pcuid=None,
pcuselect=None,
site=None,
+ sitelist=None,
dbname="findbadpcus",
cachenodes=False,
cachecalls=True,
help="Provide the input file for the node list")
parser.add_option("", "--site", dest="site", metavar="FILE",
help="Get all pcus associated with the given site's nodes")
+ parser.add_option("", "--sitelist", dest="sitelist", metavar="FILE",
+ help="Get all pcus associated with the given site's nodes")
parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE",
help="Query string to apply to the findbad pcus")
parser.add_option("", "--pcuid", dest="pcuid", metavar="id",
args = {}
args['known_hosts'] = os.environ['HOME'] + os.sep + ".ssh" + os.sep + "known_hosts"
try:
- import config
+ from monitor import config
args['XMLRPC_SERVER'] = config.API_SERVER
except:
args['XMLRPC_SERVER'] = 'https://boot.planet-lab.org/PLCAPI/'
from monitor.wrapper import plc, plccache
from datetime import datetime
-from monitor.model import PersistFlags
+from monitor.model import PersistFlags, Message
esc = struct.pack('i', 27)
RED = esc + "[1;31m"
l_nodes = node_select(config.nodeselect, node_list, None)
return l_nodes
-
+
+def email_exception():
+ from monitor import config
+ import traceback
+ msg=traceback.format_exc()
+ m=Message("exception running monitor", msg, False)
+ m.send([config.cc_email])
+ return
else:
print "takeAction: increasing penalty for %s"%self.hostname
pp.increase()
+
+ print "takeAction: applying penalty to %s as index %s"% (self.hostname, index)
pp.index = index
pp.apply(self.hostname)
pp.save()
#### APPLY PENALTY
if ( record.data['take_action'] and diag['Squeeze'] ):
- print "action: taking action"
+ print "action: taking squeeze action"
record.takeAction(record.data['penalty_level'])
del diag['Squeeze']
if diag.getFlag('BackOff'):
+ print "action: taking backoff action"
record.takeAction(0)
del diag['BackOff']
###### DRY RUN ############################
- if 'node_ids' in values['plc_pcu_stats'] and \
+ if continue_probe and 'node_ids' in values['plc_pcu_stats'] and \
len(values['plc_pcu_stats']['node_ids']) > 0:
rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0],
values, 1, True)
print "____________________________________"
errors['traceback'] = traceback.format_exc()
print errors['traceback']
- values['reboot_trial_status'] = errors['traceback']
+ values['reboot_trial_status'] = str(errors['traceback'])
+ print values
values['entry_complete']=" ".join(values['entry_complete'])
fbquery = FindbadNodeRecord.get_all_latest()
fb_nodelist = [ n.hostname for n in fbquery ]
if True:
+ # NOTE: this doesn't work when there are only a few records current.
+ # pcu_select should apply to all pcus globally, not just the most recent records.
fbpcuquery = FindbadPCURecord.get_all_latest()
fbpcu_list = [ p.plc_pcuid for p in fbpcuquery ]
def run(self, node_port, dryrun):
print "RUNNING!!!!!!!!!!!!"
- if self.type == Transport.HTTPS or self.type == Transport.HTTP:
+ if self.transport.type == Transport.HTTPS or self.type == Transport.HTTP:
print "APC via http...."
return self.run_http_or_https(node_port, dryrun)
else:
else:
# TODO: also send message for https, since that doesn't work this way...
- if self.type == Transport.HTTPS:
+ if self.transport.type == Transport.HTTPS:
cmd = self.get_https_cmd()
- elif self.type == Transport.HTTP:
+ elif self.transport.type == Transport.HTTP:
cmd = self.get_http_cmd()
else:
raise ExceptionNoTransport("Unsupported transport for http command")
# NOTE: we may need to return software version, no model version to
# know which file to request on the server.
- if self.type == Transport.HTTP:
+ if self.transport.type == Transport.HTTP:
cmd = """curl -s --anyauth --user '%s:%s' http://%s/about.htm """ + \
""" | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ + \
""" | grep -E "AP[[:digit:]]+" """
#""" | grep -E "v[[:digit:]].*" """
- elif self.type == Transport.HTTPS:
+ elif self.transport.type == Transport.HTTPS:
cmd = """curl -s --insecure --user '%s:%s' https://%s/about.htm """ + \
""" | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ + \
""" | grep -E "AP[[:digit:]]+" """
def logout(self):
# NOTE: log out again, to allow other uses to access the machine.
- if self.type == Transport.HTTP:
+ if self.transport.type == Transport.HTTP:
cmd = """curl -s --anyauth --user '%s:%s' http://%s/logout.htm """ + \
""" | grep -E '^[^<]+' """
- elif self.type == Transport.HTTPS:
+ elif self.transport.type == Transport.HTTPS:
cmd = """curl -s --insecure --user '%s:%s' http://%s/logout.htm """ + \
""" | grep -E '^[^<]+' """
else:
from pcucontrol.reboot import *
class BayTechRPC3NC(PCUControl):
+ supported_ports = [22,23]
def run_telnet(self, node_port, dryrun):
return self.run_ssh(node_port, dryrun)
return 0
class BayTechRPC16(PCUControl):
+ supported_ports = [22,23]
def run_telnet(self, node_port, dryrun):
return self.run_ssh(node_port, dryrun)
def run_ssh(self, node_port, dryrun):
indefinitely, unless you send a Ctrl-C after the password. No idea
why.
"""
+ supported_ports = [22]
def run_ssh(self, node_port, dryrun):
print "BayTechCtrlC %s" % self.host
if index == 0:
print "3"
s.send("3\r\n")
+ time.sleep(5)
index = s.expect(["DS-RPC>", "Enter user name:"])
if index == 1:
s.send(self.username + "\r\n")
+ time.sleep(5)
index = s.expect(["DS-RPC>"])
if index == 0:
indefinitely, unless you send a Ctrl-C after the password. No idea
why.
"""
+ supported_ports = [22]
def run_ssh(self, node_port, dryrun):
print "BayTechCtrlC %s" % self.host
original_prompts="Dell", login_timeout=Transport.TELNET_TIMEOUT):
raise ExceptionPassword("Invalid Password")
- print "logging in..."
+ print "logging in... %s" % self.host
s.send("\r\n\r\n")
try:
# Testing Reboot ?
logger.debug("runcmd raised exception %s" % err)
if verbose:
logger.debug(err)
- return err
+ return str(err)
req.add_header("Authorization", authheader)
# add data to handler,
f = urllib2.urlopen(req, data)
- if self.verbose: print f.read()
+ if self.transport.verbose: print f.read()
except:
import traceback; traceback.print_exc()
# fetch url one more time on cmd.html, econtrol.html or whatever.
# pass
else:
- if self.verbose: print f.read()
+ if self.transport.verbose: print f.read()
return 0
# NOTE: it doesn't seem to matter whether this authinfo is here or not.
transport = urllib2.build_opener(authinfo)
f = transport.open(self.url)
- if self.verbose: print f.read()
+ if self.transport.verbose: print f.read()
if not dryrun:
transport = urllib2.build_opener(authhandler)
f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
- if self.verbose: print f.read()
+ if self.transport.verbose: print f.read()
self.transport.close()
return 0
# NOTE: it doesn't seem to matter whether this authinfo is here or not.
transport = urllib2.build_opener()
f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
- if self.verbose: print f.read()
+ if self.transport.verbose: print f.read()
if not dryrun:
transport = urllib2.build_opener(authhandler)
f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
- if self.verbose: print f.read()
+ if self.transport.verbose: print f.read()
# data= "P%d=r" % node_port
#self.open(self.host, self.username, self.password)
transport.set_debuglevel(self.verbose)
if username is not None:
self.transport = transport
- self.transport.ifThenSend(prompt, username, ExceptionUsername)
+ self.ifThenSend(prompt, username, ExceptionUsername)
elif self.type == self.SSH:
if username is not None:
def reboot(self, node_port, dryrun):
port_list = []
+ # There are two sources of potential ports. Those that are open and
+ # those that are part of the PCU's supported_ports.
+ # I think we should start with supported_ports and then filter that
+ # by the open ports.
+
+ port_list = self.supported_ports
+
if hasattr(self, 'port_status') and self.port_status:
+ # get out the open ports
port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys())
port_list = [ int(x) for x in port_list ]
+ # take only the open ports that are supported_ports
+ port_list = filter(lambda x: x in self.supported_ports, port_list)
if port_list == []:
- raise ExceptionPort("Unsupported Port: No transport from open ports")
- else:
- port_list = self.supported_ports
+ raise ExceptionPort("No Open Port: No transport from open ports")
print port_list
- ret = "could not run"
+ ret = "No implementation for open ports on selected PCU model"
for port in port_list:
if port not in Transport.porttypemap:
continue
type = Transport.porttypemap[port]
self.transport = Transport(type, verbose)
+ print "checking for run_%s" % type
if hasattr(self, "run_%s" % type):
+ print "found run_%s" % type
fxn = getattr(self, "run_%s" % type)
ret = self.catcherror(fxn, node_port, dryrun)
if ret == 0: # NOTE: success!, so stop
except urllib2.URLError, err:
return "URLError: " + str(err)
except EOFError, err:
- if self.verbose:
- logger.debug("reboot: EOF")
- logger.debug(err)
self.transport.close()
import traceback
traceback.print_exc()
return rb_ret
+def convert_oldmodelname_to_newmodelname(oldmodelname, pcu_id):
+ newmodelname = None
+ update = { 'AP79xx' : 'APCControl13p13',
+ 'Masterswitch' : 'APCControl13p13',
+ 'DS4-RPC' : 'BayTech',
+ 'IP-41x_IP-81x' : 'IPAL',
+ 'DRAC3' : 'DRAC',
+ 'DRAC4' : 'DRAC',
+ 'ePowerSwitch' : 'ePowerSwitchOld',
+ 'ilo2' : 'HPiLO',
+ 'ilo1' : 'HPiLO',
+ 'PM211-MIP' : 'PM211MIP',
+ 'AMT2.5' : 'IntelAMT',
+ 'AMT3.0' : 'IntelAMT',
+ 'WTI_IPS-4' : 'WTIIPS4',
+ 'unknown' : 'ManualPCU',
+ 'DRAC5' : 'DRAC',
+ 'ipmi' : 'OpenIPMI',
+ 'bbsemaverick' : 'BlackBoxPSMaverick',
+ 'manualadmin' : 'ManualPCU',
+ }
+
+ if oldmodelname in update:
+ newmodelname = update[oldmodelname]
+ else:
+ newmodelname = oldmodelname
+
+ if pcu_id in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
+ newmodelname = 'APCControl12p3'
+ elif pcu_id in [1110,86]:
+ newmodelname = 'APCControl1p4'
+ elif pcu_id in [1221,1225,1220,1192]:
+ newmodelname = 'APCControl121p3'
+ elif pcu_id in [1173,1240,47,1363,1405,1401,1372,1371]:
+ newmodelname = 'APCControl121p1'
+ elif pcu_id in [1056,1237,1052,1209,1002,1008,1013,1022]:
+ newmodelname = 'BayTechCtrlC'
+ elif pcu_id in [93]:
+ newmodelname = 'BayTechRPC3NC'
+ elif pcu_id in [1057]:
+ newmodelname = 'BayTechCtrlCUnibe'
+ elif pcu_id in [1012]:
+ newmodelname = 'BayTechRPC16'
+ elif pcu_id in [1089, 1071, 1046, 1035, 1118]:
+ newmodelname = 'ePowerSwitchNew'
+
+ return newmodelname
+
def reboot_test_new(nodename, values, verbose, dryrun):
rb_ret = ""
if 'plc_pcu_stats' in values:
values.update(values['plc_pcu_stats'])
try:
- modelname = values['model']
+ modelname = convert_oldmodelname_to_newmodelname(values['model'], values['pcu_id'])
if modelname:
- object = eval('%s(values, verbose, ["22", "23", "80", "443", "9100", "16992", "5869"])' % modelname)
+ object = eval('%s(values, verbose)' % modelname)
rb_ret = object.reboot(values[nodename], dryrun)
else:
rb_ret = "Not_Run"