From be11398f97d51c72f275df3d4f7e095e900f2add Mon Sep 17 00:00:00 2001 From: Stephen Soltesz <soltesz@cs.princeton.edu> Date: Wed, 11 Mar 2009 21:06:44 +0000 Subject: [PATCH] split reboot.py across pcucontrol and monitor modules. now pcucontrol is compeletely independent. improved the timeout function in util/command.py. allows script to hop past nodes that are really broken. minor edits to address reboot.py changes. --- findbadpcu.py | 1 - grouprins.py | 2 +- monitor/reboot.py | 433 +--------------------------------------- monitor/util/command.py | 102 ++++++---- nodeinfo.py | 2 +- nodequery.py | 3 - pcubad.py | 2 +- pcucontrol/reboot.py | 108 +--------- sitebad.py | 1 - siteinfo.py | 1 - 10 files changed, 70 insertions(+), 585 deletions(-) diff --git a/findbadpcu.py b/findbadpcu.py index 7e84513..d00d7f7 100755 --- a/findbadpcu.py +++ b/findbadpcu.py @@ -13,7 +13,6 @@ import threadpool import threading import monitor -from pcucontrol import reboot from monitor import config from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session from monitor import database diff --git a/grouprins.py b/grouprins.py index 433ecd3..eb89e6a 100755 --- a/grouprins.py +++ b/grouprins.py @@ -17,7 +17,7 @@ from monitor import util from monitor import const from monitor import database from monitor import parser as parsermodule -from pcucontrol import reboot +from monitor import reboot from monitor.wrapper import plc api = plc.getAuthAPI() diff --git a/monitor/reboot.py b/monitor/reboot.py index c3f6103..3a8e8c5 100755 --- a/monitor/reboot.py +++ b/monitor/reboot.py @@ -17,8 +17,6 @@ import pcucontrol.transports.ssh.pxssh as pxssh import pcucontrol.transports.ssh.pexpect as pexpect import socket - - # Use our versions of telnetlib and pyssh sys.path.insert(0, os.path.dirname(sys.argv[0])) import pcucontrol.transports.telnetlib as telnetlib @@ -29,6 +27,8 @@ from monitor import config from monitor.util import command from monitor.wrapper import plc +from pcucontrol.reboot import pcu_name, model_to_object, reboot_api, convert_oldmodelname_to_newmodelname, reboot_test_new + # Event class ID from pcu events #NODE_POWER_CONTROL = 3 @@ -41,312 +41,6 @@ logger = logging.getLogger("monitor") verbose = 1 #dryrun = 0; -class ExceptionNoTransport(Exception): pass -class ExceptionNotFound(Exception): pass -class ExceptionPassword(Exception): pass -class ExceptionTimeout(Exception): pass -class ExceptionPrompt(Exception): pass -class ExceptionSequence(Exception): pass -class ExceptionReset(Exception): pass -class ExceptionPort(Exception): pass -class ExceptionUsername(Exception): pass - - - -# PCU has model, host, preferred-port, user, passwd, - -# This is an object derived directly form the PLCAPI DB fields -class PCU(object): - def __init__(self, plc_pcu_dict): - for field in ['username', 'password', 'site_id', - 'hostname', 'ip', - 'pcu_id', 'model', - 'node_ids', 'ports', ]: - if field in plc_pcu_dict: - self.__setattr__(field, plc_pcu_dict[field]) - else: - raise Exception("No such field %s in PCU object" % field) - -# These are the convenience functions build around the PCU object. -class PCUModel(PCU): - def __init__(self, plc_pcu_dict): - PCU.__init__(self, plc_pcu_dict) - self.host = self.pcu_name() - - def pcu_name(self): - if self.hostname is not None and self.hostname is not "": - return self.hostname - elif self.ip is not None and self.ip is not "": - return self.ip - else: - return None - - def nodeidToPort(self, node_id): - if node_id in self.node_ids: - for i in range(0, len(self.node_ids)): - if node_id == self.node_ids[i]: - return self.ports[i] - - raise Exception("No such Node ID: %d" % node_id) - -# This class captures the observed pcu records from FindBadPCUs.py -class PCURecord: - def __init__(self, pcu_record_dict): - for field in ['port_status', - 'dns_status', - 'entry_complete', ]: - if field in pcu_record_dict: - if field == "reboot": - self.__setattr__("reboot_str", pcu_record_dict[field]) - else: - self.__setattr__(field, pcu_record_dict[field]) - #else: - # raise Exception("No such field %s in pcu record dict" % field) - -class Transport: - TELNET = "telnet" - SSH = "ssh" - HTTP = "http" - HTTPS = "https" - IPAL = "ipal" - DRAC = "drac" - AMT = "amt" - - TELNET_TIMEOUT = 120 - - porttypemap = { - 5869 : DRAC, - 22 : SSH, - 23 : TELNET, - 443 : HTTPS, - 80 : HTTP, - 9100 : IPAL, - 16992 : AMT, - } - - def __init__(self, type, verbose): - self.type = type - self.verbose = verbose - self.transport = None - - def open(self, host, username=None, password=None, prompt="User Name"): - transport = None - - if self.type == self.TELNET: - transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT) - transport.set_debuglevel(self.verbose) - if username is not None: - self.transport = transport - self.ifThenSend(prompt, username, ExceptionUsername) - - elif self.type == self.SSH: - if username is not None: - transport = pyssh.Ssh(username, host) - transport.set_debuglevel(self.verbose) - transport.open() - # TODO: have an ssh set_debuglevel() also... - else: - raise Exception("Username cannot be None for ssh transport.") - elif self.type == self.HTTP: - # NOTE: this does not work for all web-based services... - self.url = "http://%s:%d/" % (host,80) - uri = "%s:%d" % (host,80) - - # create authinfo - authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm() - authinfo.add_password (None, uri, username, password) - authhandler = urllib2.HTTPBasicAuthHandler( authinfo ) - - transport = urllib2.build_opener(authhandler) - else: - raise Exception("Unknown transport type: %s" % self.type) - - self.transport = transport - return True - - def close(self): - if self.type == self.TELNET: - self.transport.close() - elif self.type == self.SSH: - self.transport.close() - elif self.type == self.HTTP: - pass - else: - raise Exception("Unknown transport type %s" % self.type) - self.transport = None - - def write(self, msg): - return self.send(msg) - - def send(self, msg): - if self.transport == None: - raise ExceptionNoTransport("transport object is type None") - - return self.transport.write(msg) - - def sendPassword(self, password, prompt=None): - if self.type == self.TELNET: - if prompt == None: - self.ifThenSend("Password", password, ExceptionPassword) - else: - self.ifThenSend(prompt, password, ExceptionPassword) - elif self.type == self.SSH: - self.ifThenSend("password:", password, ExceptionPassword) - elif self.type == self.HTTP: - pass - else: - raise Exception("Unknown transport type: %s" % self.type) - - def sendHTTP(self, resource, data): - if self.verbose: - print "POSTing '%s' to %s" % (data,self.url + resource) - - try: - f = self.transport.open(self.url + resource ,data) - r = f.read() - if self.verbose: - print r - - except urllib2.URLError,err: - logger.info('Could not open http connection', err) - return "http transport error" - - return 0 - - def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt): - - if self.transport != None: - output = self.transport.read_until(expected, self.TELNET_TIMEOUT) - if output.find(expected) == -1: - print "OUTPUT: --%s--" % output - raise ErrorClass, "'%s' not found" % expected - else: - self.transport.write(buffer + "\r\n") - else: - raise ExceptionNoTransport("transport object is type None") - - def ifElse(self, expected, ErrorClass): - try: - self.transport.read_until(expected, self.TELNET_TIMEOUT) - except: - raise ErrorClass("Could not find '%s' within timeout" % expected) - -class PCUControl(PCUModel,PCURecord): - - """ - There are three cases: - 1) the pcu_record passed below includes port_status from an - external probe. - 2) the external probe failed, and the values are empty - 3) this call is made independent of port_status. - - In the first case, the first open port is used. - In the third case, the ports are tried in sequence. - - In this way, the port_status value serves only as an optimization, - because closed ports are avoided. The supported_ports value should - order ports by their preferred usage. - """ - - supported_ports = [] - - def __init__(self, plc_pcu_record, verbose, ignored=None): - PCUModel.__init__(self, plc_pcu_record) - PCURecord.__init__(self, plc_pcu_record) - - def reboot(self, node_port, dryrun): - - port_list = [] - # There are two sources of potential ports. Those that are open and - # those that are part of the PCU's supported_ports. - # I think we should start with supported_ports and then filter that - # by the open ports. - - port_list = self.supported_ports - - if hasattr(self, 'port_status') and self.port_status: - # get out the open ports - port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys()) - port_list = [ int(x) for x in port_list ] - # take only the open ports that are supported_ports - port_list = filter(lambda x: x in self.supported_ports, port_list) - if port_list == []: - raise ExceptionPort("No Open Port: No transport from open ports") - - print port_list - - ret = "No implementation for open ports on selected PCU model" - for port in port_list: - if port not in Transport.porttypemap: - continue - - type = Transport.porttypemap[port] - self.transport = Transport(type, verbose) - - print "checking for run_%s" % type - if hasattr(self, "run_%s" % type): - print "found run_%s" % type - fxn = getattr(self, "run_%s" % type) - ret = self.catcherror(fxn, node_port, dryrun) - if ret == 0: # NOTE: success!, so stop - break - else: - continue - - return ret - - def run(self, node_port, dryrun): - """ This function is to be defined by the specific PCU instance. """ - raise Exception("This function is not implemented") - pass - - #def reboot(self, node_port, dryrun): - - def catcherror(self, function, node_port, dryrun): - try: - return function(node_port, dryrun) - except ExceptionNotFound, err: - return "error: " + str(err) - except ExceptionPassword, err: - return "Password exception: " + str(err) - except ExceptionTimeout, err: - return "Timeout exception: " + str(err) - except ExceptionUsername, err: - return "No username prompt: " + str(err) - except ExceptionSequence, err: - return "Sequence error: " + str(err) - except ExceptionPrompt, err: - return "Prompt exception: " + str(err) - except ExceptionNoTransport, err: - return "No Transport: " + str(err) - except ExceptionPort, err: - return "No ports exception: " + str(err) - except socket.error, err: - return "socket error: timeout: " + str(err) - except urllib2.HTTPError, err: - return "HTTPError: " + str(err) - except urllib2.URLError, err: - return "URLError: " + str(err) - except EOFError, err: - self.transport.close() - import traceback - traceback.print_exc() - return "EOF connection reset" + str(err) - except Exception, err: - from monitor.common import email_exception - email_exception(self.host) - raise Exception(err) - -from pcucontrol.models import * - -def pcu_name(pcu): - if pcu['hostname'] is not None and pcu['hostname'] is not "": - return pcu['hostname'] - elif pcu['ip'] is not None and pcu['ip'] is not "": - return pcu['ip'] - else: - return None - def get_pcu_values(pcu_id): from monitor.database.info.model import FindbadPCURecord print "pcuid: %s" % pcu_id @@ -414,129 +108,6 @@ def reboot_policy(nodename, continue_probe, dryrun): print "return true" return True -class Unknown(PCUControl): - supported_ports = [22,23,80,443,5869,9100,16992] - -def model_to_object(modelname): - if modelname is None: - return ManualPCU - if "AMT" in modelname: - return IntelAMT - elif "BayTech" in modelname: - return BayTech - elif "HPiLO" in modelname: - return HPiLO - elif "IPAL" in modelname: - return IPAL - elif "APC" in modelname: - return APCControl - elif "DRAC" in modelname: - return DRAC - elif "WTI" in modelname: - return WTIIPS4 - elif "ePowerSwitch" in modelname: - return ePowerSwitchNew - elif "IPMI" in modelname: - return IPMI - elif "BlackBoxPSMaverick" in modelname: - return BlackBoxPSMaverick - elif "PM211MIP" in modelname: - return PM211MIP - elif "ManualPCU" in modelname: - return ManualPCU - else: - print "UNKNOWN model %s"%modelname - return Unknown - -def reboot_api(node, pcu): #, verbose, dryrun): - rb_ret = "" - - try: - modelname = pcu['model'] - if modelname: - # get object instance - instance = eval('%s(pcu, verbose)' % modelname) - # get pcu port - i = pcu['node_ids'].index(node['node_id']) - p = pcu['ports'][i] - # reboot - rb_ret = instance.reboot(p, False) - else: - rb_ret = "No modelname in PCU record." - # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults - except Exception, err: - rb_ret = str(err) - - return rb_ret - -def convert_oldmodelname_to_newmodelname(oldmodelname, pcu_id): - newmodelname = None - update = { 'AP79xx' : 'APCControl13p13', - 'Masterswitch' : 'APCControl13p13', - 'DS4-RPC' : 'BayTech', - 'IP-41x_IP-81x' : 'IPAL', - 'DRAC3' : 'DRAC', - 'DRAC4' : 'DRAC', - 'ePowerSwitch' : 'ePowerSwitchOld', - 'ilo2' : 'HPiLO', - 'ilo1' : 'HPiLO', - 'PM211-MIP' : 'PM211MIP', - 'AMT2.5' : 'IntelAMT', - 'AMT3.0' : 'IntelAMT', - 'WTI_IPS-4' : 'WTIIPS4', - 'unknown' : 'ManualPCU', - 'DRAC5' : 'DRAC', - 'ipmi' : 'OpenIPMI', - 'bbsemaverick' : 'BlackBoxPSMaverick', - 'manualadmin' : 'ManualPCU', - } - - if oldmodelname in update: - newmodelname = update[oldmodelname] - else: - newmodelname = oldmodelname - - if pcu_id in [1102,1163,1055,1111,1231,1113,1127,1128,1148]: - newmodelname = 'APCControl12p3' - elif pcu_id in [1110,86]: - newmodelname = 'APCControl1p4' - elif pcu_id in [1221,1225,1220,1192]: - newmodelname = 'APCControl121p3' - elif pcu_id in [1173,1240,47,1363,1405,1401,1372,1371]: - newmodelname = 'APCControl121p1' - elif pcu_id in [1056,1237,1052,1209,1002,1008,1013,1022]: - newmodelname = 'BayTechCtrlC' - elif pcu_id in [93]: - newmodelname = 'BayTechRPC3NC' - elif pcu_id in [1057]: - newmodelname = 'BayTechCtrlCUnibe' - elif pcu_id in [1012]: - newmodelname = 'BayTechRPC16' - elif pcu_id in [1089, 1071, 1046, 1035, 1118]: - newmodelname = 'ePowerSwitchNew' - - return newmodelname - -def reboot_test_new(nodename, values, verbose, dryrun): - rb_ret = "" - if 'plc_pcu_stats' in values: - values.update(values['plc_pcu_stats']) - - try: - modelname = convert_oldmodelname_to_newmodelname(values['model'], values['pcu_id']) - if modelname: - object = eval('%s(values, verbose)' % modelname) - rb_ret = object.reboot(values[nodename], dryrun) - else: - rb_ret = "Not_Run" - # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults - except ExceptionPort, err: - rb_ret = str(err) - except NameError, err: - rb_ret = str(err) - - return rb_ret - def main(): logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() diff --git a/monitor/util/command.py b/monitor/util/command.py index e3e81ca..e5663c3 100644 --- a/monitor/util/command.py +++ b/monitor/util/command.py @@ -4,10 +4,12 @@ import subprocess import signal import time import traceback +import fcntl DEBUG= 0 class ExceptionTimeout(Exception): pass +class ExceptionReadTimeout(Exception): pass COMMAND_TIMEOUT = 60 ssh_options = { 'StrictHostKeyChecking':'no', 'BatchMode':'yes', @@ -15,15 +17,47 @@ ssh_options = { 'StrictHostKeyChecking':'no', 'ConnectTimeout':'%s' % COMMAND_TIMEOUT} class Sopen(subprocess.Popen): - def kill(self, signal = signal.SIGTERM): - os.kill(self.pid, signal) + def kill(self, sig = signal.SIGTERM): + try: + # NOTE: this also kills parent... so doesn't work like I want. + # NOTE: adding 'exec' before the cmd removes the extra sh, and + # partially addresses this problem. + #os.killpg(os.getpgid(self.pid), signal.SIGKILL) + os.kill(self.pid, sig) + except OSError: + # no such process, due to it already exiting... + pass + + +def read_t(stream, count=1, timeout=COMMAND_TIMEOUT*2): + if count == 1: + retstr = "" + + while True: + lin, lout, lerr = select([stream], [], [], timeout) + if len(lin) == 0: + print "timeout!" + raise ExceptionReadTimeout("TIMEOUT reading from command") -def read_t(stream, count, timeout=COMMAND_TIMEOUT*2): - lin, lout, lerr = select([stream], [], [], timeout) - if len(lin) == 0: - raise ExceptionTimeout("TIMEOUT Running: %s" % cmd) + try: + outbytes = stream.read(count) + except IOError, err: + print 'no content yet.' + # due to no content. + # the select timeout should catch this. + continue - return stream.read(count) + if not outbytes: + break + retstr += outbytes + + return retstr + else: + lin, lout, lerr = select([stream], [], [], timeout) + if len(lin) == 0: + raise ExceptionReadTimeout("TIMEOUT reading from command") + + return stream.read(count) class CMD: def __init__(self): @@ -31,15 +65,18 @@ class CMD: def run_noexcept(self, cmd, timeout=COMMAND_TIMEOUT*2): - #print "CMD.run_noexcept(%s)" % cmd try: return CMD.run(self,cmd,timeout) except ExceptionTimeout: print traceback.print_exc() - return ("", "SCRIPTTIMEOUT") - except: + return ("", "ScriptTimeout") + except ExceptionReadTimeout: + print traceback.print_exc() + return ("", "RunningScriptTimeout") + except Exception, err: from monitor.common import email_exception email_exception() + return ("", str(err)) def system(self, cmd, timeout=COMMAND_TIMEOUT*2): (o,e) = self.run(cmd, timeout) @@ -51,16 +88,13 @@ class CMD: def run(self, cmd, timeout=COMMAND_TIMEOUT*2): - #print "CMD.run(%s)" % cmd s = Sopen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) self.s = s (f_in, f_out, f_err) = (s.stdin, s.stdout, s.stderr) - #print "calling select(%s)" % timeout lout, lin, lerr = select([f_out], [], [f_err], timeout) - #print "TIMEOUT!!!!!!!!!!!!!!!!!!!" if len(lin) == 0 and len(lout) == 0 and len(lerr) == 0: # Reached a timeout! Nuke process so it does not hang. - #print "KILLING" + print "TIMEOUT!!!!!!!!!!!!!!!!!!!" s.kill(signal.SIGKILL) raise ExceptionTimeout("TIMEOUT Running: %s" % cmd) else: @@ -71,28 +105,26 @@ class CMD: o_value = "" e_value = "" - o_value = f_out.read() + #o_value = f_out.read() + flags = fcntl.fcntl(f_out, fcntl.F_GETFL) + fcntl.fcntl(f_out, fcntl.F_SETFL, flags | os.O_NONBLOCK) + + try: + o_value = read_t(f_out,1,30) + except ExceptionReadTimeout: + s.kill(signal.SIGKILL) + raise ExceptionReadTimeout("TIMEOUT: failed to read from cmd: %s" % cmd) + e_value = f_err.read() - #print "striping output" o_value = o_value.strip() e_value = e_value.strip() - #print "OUTPUT -%s-%s-" % (o_value, e_value) - - #print "closing files" f_out.close() f_in.close() f_err.close() - try: - #print "s.kill()" - s.kill() - #print "after s.kill()" - except OSError: - # no such process, due to it already exiting... - pass + s.kill(signal.SIGKILL) - #print o_value, e_value return (o_value, e_value) def runargs(self, args, timeout=COMMAND_TIMEOUT*2): @@ -117,11 +149,7 @@ class CMD: f_out.close() f_in.close() f_err.close() - try: - s.kill() - except OSError: - # no such process, due to it already exiting... - pass + s.kill(signal.SIGKILL) return (o_value, e_value) @@ -164,17 +192,9 @@ class SSH(CMD): return CMD.run_noexcept(self, cmd) def run_noexcept2(self, cmd, timeout=COMMAND_TIMEOUT*2): - cmd = "ssh -p %s %s %s@%s %s" % (self.port, self.__options_to_str(), + cmd = "exec ssh -p %s %s %s@%s %s" % (self.port, self.__options_to_str(), self.user, self.host, cmd) - #print "SSH.run_noexcept2(%s)" % cmd r = CMD.run_noexcept(self, cmd, timeout) - - # XXX: this may be resulting in deadlocks... not sure. - #if self.s.returncode is None: - # #self.s.kill() - # self.s.kill(signal.SIGKILL) - # self.s.wait() - # self.ret = self.s.returncode self.ret = -1 return r diff --git a/nodeinfo.py b/nodeinfo.py index 9afed5c..3248707 100755 --- a/nodeinfo.py +++ b/nodeinfo.py @@ -8,7 +8,7 @@ from monitor import util from monitor import parser as parsermodule from monitor import database -from pcucontrol import reboot +from monitor import reboot import time from monitor.model import * diff --git a/nodequery.py b/nodequery.py index 781e841..7a53df0 100755 --- a/nodequery.py +++ b/nodequery.py @@ -13,7 +13,6 @@ import time import re import string -from pcucontrol import reboot from monitor.wrapper import plc, plccache api = plc.getAuthAPI() @@ -383,8 +382,6 @@ def main(): #fbnodes = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname, orderBy='date_checked',distinct=True).reversed() fb = None - #reboot.fb = fbpcu - if config.nodelist: nodelist = util.file.getListFromFile(config.nodelist) else: diff --git a/pcubad.py b/pcubad.py index 181f001..13fce72 100755 --- a/pcubad.py +++ b/pcubad.py @@ -7,7 +7,7 @@ import time from datetime import datetime,timedelta from monitor import database -from pcucontrol import reboot +from monitor import reboot from monitor import parser as parsermodule from monitor import config from monitor.database.info.model import HistoryPCURecord, FindbadPCURecord diff --git a/pcucontrol/reboot.py b/pcucontrol/reboot.py index b762d89..a0ba1f2 100755 --- a/pcucontrol/reboot.py +++ b/pcucontrol/reboot.py @@ -11,13 +11,12 @@ import urllib2 import urllib import threading, popen2 import array, struct -from monitor.wrapper import plc import base64 from subprocess import PIPE, Popen import pcucontrol.transports.ssh.pxssh as pxssh import pcucontrol.transports.ssh.pexpect as pexpect import socket -from monitor.util import command + # Use our versions of telnetlib and pyssh @@ -25,8 +24,6 @@ sys.path.insert(0, os.path.dirname(sys.argv[0])) import pcucontrol.transports.telnetlib as telnetlib sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh") import pcucontrol.transports.pyssh as pyssh -from monitor import config - # Event class ID from pcu events #NODE_POWER_CONTROL = 3 @@ -35,7 +32,6 @@ from monitor import config #MONITOR_USER_ID = 11142 import logging -logger = logging.getLogger("monitor") verbose = 1 #dryrun = 0; @@ -206,7 +202,7 @@ class Transport: print r except urllib2.URLError,err: - logger.info('Could not open http connection', err) + print 'Could not open http connection', err return "http transport error" return 0 @@ -345,73 +341,6 @@ def pcu_name(pcu): else: return None -def get_pcu_values(pcu_id): - from monitor.database.info.model import FindbadPCURecord - print "pcuid: %s" % pcu_id - try: - pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first() - if pcurec: - values = pcurec.to_dict() - else: - values = None - except: - values = None - - return values - -def reboot(nodename): - return reboot_policy(nodename, True, False) - -def reboot_str(nodename): - global verbose - continue_probe = True - dryrun=False - - pcu = plc.getpcu(nodename) - if not pcu: - logger.debug("no pcu for %s" % nodename) - print "no pcu for %s" % nodename - return False # "%s has no pcu" % nodename - - values = get_pcu_values(pcu['pcu_id']) - if values == None: - logger.debug("No values for pcu probe %s" % nodename) - print "No values for pcu probe %s" % nodename - return False #"no info for pcu_id %s" % pcu['pcu_id'] - - # Try the PCU first - logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model'])) - - ret = reboot_test_new(nodename, values, verbose, dryrun) - return ret - -def reboot_policy(nodename, continue_probe, dryrun): - global verbose - - pcu = plc.getpcu(nodename) - if not pcu: - logger.debug("no pcu for %s" % nodename) - print "no pcu for %s" % nodename - return False # "%s has no pcu" % nodename - - values = get_pcu_values(pcu['pcu_id']) - if values == None: - logger.debug("No values for pcu probe %s" % nodename) - print "No values for pcu probe %s" % nodename - return False #"no info for pcu_id %s" % pcu['pcu_id'] - - # Try the PCU first - logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model'])) - - ret = reboot_test_new(nodename, values, verbose, dryrun) - - if ret != 0: - print ret - return False - else: - print "return true" - return True - class Unknown(PCUControl): supported_ports = [22,23,80,443,5869,9100,16992] @@ -446,7 +375,7 @@ def model_to_object(modelname): print "UNKNOWN model %s"%modelname return Unknown -def reboot_api(node, pcu): #, verbose, dryrun): +def reboot_api(node, pcu): rb_ret = "" try: @@ -536,36 +465,7 @@ def reboot_test_new(nodename, values, verbose, dryrun): return rb_ret def main(): - logger.setLevel(logging.DEBUG) - ch = logging.StreamHandler() - ch.setLevel(logging.DEBUG) - formatter = logging.Formatter('LOGGER - %(message)s') - ch.setFormatter(formatter) - logger.addHandler(ch) - - try: - if "test" in sys.argv: - dryrun = True - else: - dryrun = False - - for node in sys.argv[1:]: - if node == "test": continue - - print "Rebooting %s" % node - if reboot_policy(node, True, dryrun): - print "success" - else: - print "failed" - except Exception, err: - import traceback; traceback.print_exc() - from monitor.common import email_exception - email_exception(node) - print err + print "this does not work." if __name__ == '__main__': - logger = logging.getLogger("monitor") main() - f = open("/tmp/rebootlog", 'a') - f.write("reboot %s\n" % sys.argv) - f.close() diff --git a/sitebad.py b/sitebad.py index f8524f0..15c8f1d 100755 --- a/sitebad.py +++ b/sitebad.py @@ -7,7 +7,6 @@ import time from datetime import datetime,timedelta from monitor import database -from pcucontrol import reboot from monitor import parser as parsermodule from monitor import config from monitor.database.info.model import HistorySiteRecord, FindbadNodeRecord, session diff --git a/siteinfo.py b/siteinfo.py index cfce458..6fe6496 100755 --- a/siteinfo.py +++ b/siteinfo.py @@ -4,7 +4,6 @@ from monitor.wrapper import plc api = plc.getAuthAPI() from monitor import database -from pcucontrol import reboot import time from monitor.common import * -- 2.47.0